def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape((mini_batch_size, self.n_in)) self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) self.y_out = T.argmax(self.output, axis=1) self.inpt_dropout = dropout_layer( inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
def recurrence(xp_t, xp_t1, xq_t1, h_t_pre1, cx, ch): # context_x # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。 cx = T.concatenate((cx[1:], xp_t.reshape( (1, n_in)))) # shape=(winx, 20) ex = T.dot(tanh(T.dot(cx, qx)), rx) # shape=(winx, 1) ax = softmax(ex.T) # shape=(1, winx) xc = (T.dot(cx.T, ax.T)).reshape((n_in, )) # shape=(20, ) # gru_unit z_r = sigmoid( T.dot(ui[:2], xp_t) + T.dot(vc[:2], xc) + T.dot(wh[:2], h_t_pre1) + bi[:2]) z, r = z_r[0], z_r[1] c = tanh( T.dot(ui[2], xp_t) + T.dot(vc[2], xc) + T.dot(wh[2], (r * h_t_pre1)) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c # shape=(20, ) # context_h # 定义定长矩阵,h_t拼接到最底下,删除首行, 矩阵维度不变。 ch = T.concatenate((ch[1:], h_t.reshape((1, n_hidden)))) # 最近的5个隐层 eh = T.dot(tanh(T.dot(ch, qh)), rh) # shape=(winh, 1) ah = softmax(eh.T) # shape=(1, winh) hc = (T.dot(ch.T, ah.T)).reshape((n_hidden, )) hw = tanh(T.dot(e, h_t) + T.dot(f, hc)) # loss upq_t = T.dot(hw, xp_t1 - xq_t1) # 正负样本训练。h(t) * (xp(t+1) - xq(t+1)) loss_t = T.log(sigmoid(upq_t)) return [h_t, cx, ch, loss_t]
def test_optimize_xent_vector2(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5) b_val = rng.randn(5) y_val = numpy.asarray([2]) x = T.dvector('x') b = T.dvector('b') y = T.lvector('y') def print_graph(func): for i, node in enumerate(func.maker.fgraph.toposort()): print i, node # Last node should be the output print i, printing.pprint(node.outputs[0]) print ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] for expr in bias_expressions: f = theano.function([x, b, y], expr, mode=mode) if verbose: print_graph(f) try: prev, last = f.maker.fgraph.toposort()[-2:] assert len(f.maker.fgraph.toposort()) == 3 # [big_op, sum, dim_shuffle] f(x_val, b_val, y_val) except Exception: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup if verbose: print_graph(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except Exception: theano.printing.debugprint(g) raise
def test_optimize_xent_vector3(self): # Same as test_optimize_xent_vector2, but y is the result of # a "flatten", and it used to make the constant-folding # of arange(y.shape[0]) happen before the xent optimization verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) b_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') b = T.vector('b') y_ = T.lvector('y_') y = y_.flatten() ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] for expr in bias_expressions: f = theano.function([x, b, y_], expr, mode=mode) if verbose: printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] # [big_op, sum, dim_shuffle, flatten] assert len(ops) <= 4 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)] f(x_val, b_val, y_val) except Exception: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup if verbose: printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except Exception: theano.printing.debugprint(g) raise
def bench_ConvLarge(batchsize, variant=True): name = "ConvLarge_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker # Image shape 256x256 GlobalBenchReporter.batch_size = batchsize data_x.set_value(randn(n_examples, 1, 256, 256)) w0 = shared(rand(6, 1, 7, 7) * numpy.sqrt(6 / (25.))) b0 = shared(zeros(6)) w1 = shared(rand(16, 6, 7, 7) * numpy.sqrt(6 / (25.))) b1 = shared(zeros(16)) vv = shared(rand(16 * 11 * 11, 120) * numpy.sqrt(6.0 / 16. / 25)) cc = shared(zeros(120)) v = shared(zeros(120, outputs)) c = shared(zeros(outputs)) params = [w0, b0, w1, b1, v, c, vv, cc] c0 = tanh(conv2d(sx, w0, image_shape=(batchsize, 1, 256, 256), filter_shape=(6, 1, 7, 7)) + b0.dimshuffle(0, 'x', 'x')) # this is not the correct leNet5 model, but it's closer to s0 = tanh(max_pool_2d(c0, (5, 5))) c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 50, 50), filter_shape=(16, 6, 7, 7)) + b1.dimshuffle(0, 'x', 'x')) s1 = tanh(max_pool_2d(c1, (4, 4))) p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv) + cc), v) + c) nll = -log(p_y_given_x)[arange(sy.shape[0]), sy] cost = nll.mean() gparams = grad(cost, params) train = function([si, nsi], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)], name=name) GlobalBenchReporter.eval_model(train, name) if not variant: return # Versions with no inputs snsi.set_value(GlobalBenchReporter.batch_size) c0 = tanh(conv2d(ssx, w0, image_shape=(batchsize, 1, 256, 256), filter_shape=(6, 1, 7, 7)) + b0.dimshuffle(0, 'x', 'x')) # this is not the correct leNet5 model, but it's closer to s0 = tanh(max_pool_2d(c0, (5, 5))) c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 50, 50), filter_shape=(16, 6, 7, 7)) + b1.dimshuffle(0, 'x', 'x')) s1 = tanh(max_pool_2d(c1, (4, 4))) p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv) + cc), v) + c) nll = -log(p_y_given_x)[arange(ssy.shape[0]), ssy] cost = nll.mean() gparams = grad(cost, params) train2 = function([], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)] + [(ssi, ssi + snsi)], name=name) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def test_optimize_xent_vector2(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5) b_val = rng.randn(5) y_val = numpy.asarray([2]) x = T.dvector('x') b = T.dvector('b') y = T.lvector('y') def print_graph(func): for i, node in enumerate(func.maker.env.toposort()): print i, node # Last node should be the output print i, printing.pprint(node.outputs[0]) print ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]) ] for expr in bias_expressions: f = theano.function([x, b, y], expr, mode=mode) if verbose: print_graph(f) try: prev, last = f.maker.env.toposort()[-2:] assert len( f.maker.env.toposort()) == 3 # [big_op, sum, dim_shuffle] f(x_val, b_val, y_val) except: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup print_graph(g) try: ops = [node.op for node in g.maker.env.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except: theano.printing.debugprint(g) raise
def setInputOutput(self, inp, inpDropout, mbSize): self.inp = inp.reshape((mbSize, self.nInp)) self.out = softmax( (1-self.pDropout)*T.dot(self.inp,self.w) + self.b ) self.yOut = T.argmax(self.out, axis=1) self.inpDropout = dropoutLayer( inpDropout.reshape((mbSize, self.nInp)), self.pDropout ) self.outDropout = softmax( T.dot(self.inpDropout,self.w)+self.b)
def bench_mlp_500(variant=True): name = "mlp_784_500_10_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker HUs = 500 w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)), name='w') b = shared(zeros(HUs), name='b') v = shared(zeros(outputs, HUs), name='v') c = shared(zeros(outputs), name='c') if GlobalBenchReporter.batch_size == 1: sx_ = sx.flatten() sy_ = specify_shape(sy, [1]) ssx_ = ssx.flatten() ssy_ = specify_shape(ssy, [1]) else: sx_ = sx sy_ = sy ssx_ = ssx ssy_ = ssy p_y_given_x = softmax(dot(tanh(dot(sx_, w.T) + b), v.T) + c) nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_] cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train = function([si, nsi], cost, updates={ w: w - lr * gw, b: b - lr * gb, v: v - lr * gv, c: c - lr * gc }, name=name) GlobalBenchReporter.eval_model(train, name) if not variant: return # Version with no inputs snsi.set_value(GlobalBenchReporter.batch_size) p_y_given_x = softmax(dot(tanh(dot(ssx_, w.T) + b), v.T) + c) nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_] cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train2 = function( [], cost, updates={ w: w - lr * gw, b: b - lr * gb, v: v - lr * gv, c: c - lr * gc, ssi: ssi + snsi }, name=name) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def test_optimize_xent_vector2(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) b_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') b = T.vector('b') y = T.lvector('y') ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]) ] for expr in bias_expressions: f = theano.function([x, b, y], expr, mode=mode) if verbose: printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] # [big_op, sum, dim_shuffle] assert len(ops) == 3 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [ 1 for o in ops if isinstance(o, T.AdvancedSubtensor) ] f(x_val, b_val, y_val) except Exception: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup if verbose: printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except Exception: theano.printing.debugprint(g) raise
def bench_deep1000(variant=True): name = "mlp_784_1000_1000_1000_10_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)), name='w0') b0 = shared(zeros(1000), name='b0') w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)), name='w1') b1 = shared(zeros(1000), name='b1') w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)), name='w2') b2 = shared(zeros(1000), name='b2') v = shared(zeros(1000, outputs), name='v') c = shared(zeros(outputs), name='c') if GlobalBenchReporter.batch_size == 1: sx_ = sx.flatten() sy_ = specify_shape(sy, [1]) ssx_ = ssx.flatten() ssy_ = specify_shape(ssy, [1]) else: sx_ = sx sy_ = sy ssx_ = ssx ssy_ = ssy params = [w0, b0, w1, b1, w2, b2, v, c] h0 = tanh(dot(sx_, w0) + b0) h1 = tanh(dot(h0, w1) + b1) h2 = tanh(dot(h1, w2) + b2) p_y_given_x = softmax(dot(h2, v) + c) nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_] cost = nll.mean() gparams = grad(cost, params) train = function([si, nsi], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)], name=name) GlobalBenchReporter.eval_model(train, name) if not variant: return # Version with no inputs h0 = tanh(dot(ssx_, w0) + b0) h1 = tanh(dot(h0, w1) + b1) h2 = tanh(dot(h1, w2) + b2) p_y_given_x = softmax(dot(h2, v) + c) nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_] cost = nll.mean() gparams = grad(cost, params) train2 = function([], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)] + [(ssi, ssi + snsi)], name=name) snsi.set_value(GlobalBenchReporter.batch_size) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def bench_logreg(variant=True): name = "mlp_784_10_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker v = shared(zeros(outputs, inputs), name='v') c = shared(zeros(outputs), name='c') if GlobalBenchReporter.batch_size == 1: sx_ = sx.flatten() sy_ = specify_shape(sy, [1]) ssx_ = ssx.flatten() ssy_ = specify_shape(ssy, [1]) else: sx_ = sx sy_ = sy ssx_ = ssx ssy_ = ssy # # Note on the transposed-ness of v for some reason, this data # layout is faster than the non-transposed orientation. # The change doesn't make much difference in the deeper models, # but in this case it was more than twice as fast. # p_y_given_x = softmax(dot(sx_, v.T) + c) nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_] cost = nll.mean() gv, gc = grad(cost, [v, c]) #theano.printing.debugprint(grad(cost, [v, c]), file=open('foo', 'wb')) train = function([si, nsi], [], updates={ v: v - lr * gv, c: c - lr * gc }, name=name) # theano.printing.debugprint(train, print_type=True) GlobalBenchReporter.eval_model(train, name) if not variant: return # Version with no inputs snsi.set_value(GlobalBenchReporter.batch_size) p_y_given_x = softmax(dot(ssx_, v.T) + c) nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_] cost = nll.mean() gv, gc = grad(cost, [v, c]) train2 = function([], [], updates={ v: v - lr * gv, c: c - lr * gc, ssi: ssi + snsi }, name=name) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape((mini_batch_size, self.n_in)) self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) print type(self.inpt), type(self.w), type(self.output) candidates = theano.shared(np.asarray(xrange(0,2), dtype=theano.config.floatX), borrow=True) # self.y_out = T.argmax(self.output, axis=1) self.y_out = T.dot(self.output, candidates) self.inpt_dropout = dropout_layer( inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
def set_inpt(self, inpt, inpt_dropout, mini_batch_size): """ Set input """ self.inpt = inpt.reshape((mini_batch_size, self.n_in)) self.output = nnet.softmax((1 - self.p_dropout) * tensor.dot(self.inpt, self.weights) + self.biases) self.y_out = tensor.argmax(self.output, axis=1) self.inpt_dropout = dropout_layer( inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) self.output_dropout = nnet.softmax( tensor.dot(self.inpt_dropout, self.weights) + self.biases)
def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape((mini_batch_size, self.n_in)) # Output is masked by 1 - the probability of the dropout layer self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) self.y_out = T.argmax(self.output, axis=1) # There is dropout in the output self.inpt_dropout = CNN.core_layers.DropoutLayer.dropout_layer( inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
def output(self, inpt, inpt_dropout, mini_batch_size): """ Generate output from a particular inpt, given the weights and biases An observation: inpt (w/o dropout) is used to feedforward to get the result. On the other hand, inpt_dropout is mainly used for training """ self.inpt = inpt.reshape((mini_batch_size, self.n_in)) self.output = softmax((1-self.dropout)*T.dot(self.inpt, self.W) + self.b) self.y_out = T.argmax(self.output, axis=1) self.inpt_dropout = dropout_layer( inpt_dropout.reshape((mini_batch_size, self.n_in)), self.dropout) self.output_dropout = softmax(T.dot(self.inpt_dropout, self.W) + self.b)
def set_connection(self, inpt, inpt_dropout, mini_batch_size): # from input to output self.inpt = inpt.reshape((mini_batch_size, self.n_in)) self.output = softmax( (1 - self.p_dropout) * (T.dot(self.inpt, self.w) + self.b)) self.y_out = T.argmax(self.output, axis=1) w = self.w * np.random.binomial(1, 1 - self.p_dropout, self.w.get_value().shape) self.inpt_dropout = inpt_dropout.reshape((mini_batch_size, self.n_in)) self.output_dropout = softmax(T.dot(self.inpt_dropout, w) + self.b) self.y_out_dropout = T.argmax(self.output_dropout, axis=1)
def bench_logreg(variant=True): name = "mlp_784_10_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker v = shared(zeros(outputs, inputs), name='v') c = shared(zeros(outputs), name='c') if GlobalBenchReporter.batch_size == 1: sx_ = sx.flatten() sy_ = specify_shape(sy, [1]) ssx_ = ssx.flatten() ssy_ = specify_shape(ssy, [1]) else: sx_ = sx sy_ = sy ssx_ = ssx ssy_ = ssy # # Note on the transposed-ness of v for some reason, this data # layout is faster than the non-transposed orientation. # The change doesn't make much difference in the deeper models, # but in this case it was more than twice as fast. # p_y_given_x = softmax(dot(sx_, v.T) + c) nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_] cost = nll.mean() gv, gc = grad(cost, [v, c]) #theano.printing.debugprint(grad(cost, [v, c]), file=open('foo', 'wb')) train = function([si, nsi], [], updates={v: v - lr * gv, c: c - lr * gc}, name=name) # theano.printing.debugprint(train, print_type=True) GlobalBenchReporter.eval_model(train, name) if not variant: return # Version with no inputs snsi.set_value(GlobalBenchReporter.batch_size) p_y_given_x = softmax(dot(ssx_, v.T) + c) nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_] cost = nll.mean() gv, gc = grad(cost, [v, c]) train2 = function([], [], updates={v: v - lr * gv, c: c - lr * gc, ssi: ssi + snsi}, name=name) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def bench_mlp_500(variant=True): name = "mlp_784_500_10_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker HUs = 500 w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)), name='w') b = shared(zeros(HUs), name='b') v = shared(zeros(outputs, HUs), name='v') c = shared(zeros(outputs), name='c') if GlobalBenchReporter.batch_size == 1: sx_ = sx.flatten() sy_ = specify_shape(sy, [1]) ssx_ = ssx.flatten() ssy_ = specify_shape(ssy, [1]) else: sx_ = sx sy_ = sy ssx_ = ssx ssy_ = ssy p_y_given_x = softmax(dot(tanh(dot(sx_, w.T) + b), v.T) + c) nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_] cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train = function([si, nsi], cost, updates={w: w - lr * gw, b: b - lr * gb, v: v - lr * gv, c: c - lr * gc}, name=name) GlobalBenchReporter.eval_model(train, name) if not variant: return # Version with no inputs snsi.set_value(GlobalBenchReporter.batch_size) p_y_given_x = softmax(dot(tanh(dot(ssx_, w.T) + b), v.T) + c) nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_] cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train2 = function([], cost, updates={w: w - lr * gw, b: b - lr * gb, v: v - lr * gv, c: c - lr * gc, ssi: ssi + snsi}, name=name) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def test_optimize_xent_vector(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') y = T.lvector('y') def print_graph(func): for i, node in enumerate(func.maker.fgraph.toposort()): print i, node # Last node should be the output print i, printing.pprint(node.outputs[0]) print ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))] for expr in bias_expressions: f = theano.function([x, y], expr, mode=mode) if verbose: print_graph(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)] f(x_val, y_val) except Exception: theano.printing.debugprint(f) raise g = theano.function([x, y], T.grad(expr, x), mode=mode) if verbose: print_graph(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) == 4 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax in ops assert softmax_grad not in ops g(x_val, y_val) except Exception: theano.printing.debugprint(g) raise
def recurrence(xp_t, xp_t1, xq_t1, mask_t, h_t_pre1, cxs, chs): # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20) # (n, winx, 20) = T.concatenate((((n, winx-1, 20)), ((n, 1, 20))), axis=1) # context_x # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。 cxs = T.concatenate( ( cxs[:, 1:, :], # shape=(n, winx-1, 20) xp_t.dimshuffle(0, 'x', 1)), # shape=(n, 1, 20) axis=1) # shape=(n, winx, 20) exs = T.dot(tanh(T.dot(cxs, qx)), rx) # shape=(n, winx, 1) exs = T.Rebroadcast((2, True))(exs) # axis=2进行broadcast, 使其可被丢掉 axs0 = softmax(exs.dimshuffle( 0, 1)) # shape=(n, winx),降一维。因为softmax按行处理。 axs = axs0.dimshuffle(0, 1, 'x') # shape=(n, winx, 1), 升一维。还原回去。 axs = T.Rebroadcast((2, True))(axs) # axis=2进行broadcast, 使其可做乘法。 # (n, 20) = T.sum((n, winx, 20) * (n, winx, 1), axis=1) xc = T.sum(cxs * axs, axis=1) # shape=(n, 20) # gru unit z_r = sigmoid( T.dot(ui[:2], xp_t.T) + T.dot(vc[:2], xc.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 20) c = tanh( T.dot(ui[2], xp_t.T) + T.dot(vc[2], xc.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # shape=(n, 20) # context_h # 定义定长矩阵,h_t拼接到最底下,删除首行, 矩阵维度不变。 chs = T.concatenate( ( chs[:, 1:, :], # shape=(n, winh-1, 20) h_t.dimshuffle(0, 'x', 1)), # shape=(n, 1, 20) axis=1) # shape=(n, winh, 20) ehs = T.dot(tanh(T.dot(chs, qh)), rh) # shape=(n, winh, 1) ehs = T.Rebroadcast((2, True))(ehs) # axis=2进行broadcast, 使其可被丢掉 ahs0 = softmax(ehs.dimshuffle( 0, 1)) # shape=(n, winh),降一维。因为softmax按行处理。 ahs = ahs0.dimshuffle(0, 1, 'x') # shape=(n, winh, 1), 升一维。还原回去 ahs = T.Rebroadcast((2, True))(ahs) # axis=2进行broadcast, 使其可做乘法。 hcs = T.sum(chs * ahs, axis=1) # shape=(n, 20) # 整体表达hws,融合当前hts、上下文hcs hws = tanh(T.dot(h_t, e.T) + T.dot(hcs, f.T)) # shape=(n, 20) # loss upq_t = T.sum( hws * (xp_t1 - xq_t1), axis=1) # shape=(n, ), h(t) * (xp(t+1) - xq(t+1)), 正负样本训练。 loss_t = T.log(sigmoid(upq_t)) loss_t *= mask_t # 只在损失这里乘一下0/1向量就可以了 return [h_t, cxs, chs, loss_t]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape((mini_batch_size, self.n_in)) # self.output use softmax function and retruns the classifier scores as probabilities self.output = softmax((1 - self.p_dropout) * T.dot(self.inpt, self.w) + self.b) self.y_out = T.argmax(self.output, axis=1) self.inpt_dropout = dropout_layer( inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) self.output_dropout = softmax( T.dot(self.inpt_dropout, self.w) + self.b)
def test_optimize_xent_vector(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') y = T.lvector('y') def print_graph(func): for i, node in enumerate(func.maker.fgraph.toposort()): print i, node # Last node should be the output print i, printing.pprint(node.outputs[0]) print ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])) ] for expr in bias_expressions: f = theano.function([x, y], expr, mode=mode) if verbose: print_graph(f) try: prev, last = f.maker.fgraph.toposort()[-2:] assert len(f.maker.fgraph.toposort()) == 5 f(x_val, y_val) except Exception: theano.printing.debugprint(f) raise g = theano.function([x, y], T.grad(expr, x), mode=mode) if verbose: print_graph(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) == 4 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax in ops assert softmax_grad not in ops g(x_val, y_val) except Exception: theano.printing.debugprint(g) raise
def test_xent_thing_int32(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(3, 5).astype(config.floatX) y_val = numpy.asarray([2, 4, 1], dtype='int64') x = T.matrix('x') y = T.lvector('y') yi = T.cast(y, 'int32') expressions = [ T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]), T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi]) ] for expr in expressions: # Verify the optimizer worked on the expressions f = theano.function([x, y], expr, mode=mode) if verbose: theano.printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [ 1 for o in ops if isinstance(o, T.AdvancedSubtensor) ] f(x_val, y_val) except Exception: theano.printing.debugprint(f) raise # Also verify the gradient wrt x g = theano.function([x, y], T.grad(expr, x), mode=mode) if verbose: theano.printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax in ops assert softmax_grad not in ops g(x_val, y_val) except Exception: theano.printing.debugprint(g) raise
def set_inpt(self, inpt, inpt_dropout, mini_batch_size): """Construct the graph to compute the softmax layer output. Args: inpt: The input var. inpt_dropout: The dropouted input var. mini_batch_size: The mini batch size. """ self.inpt = inpt.reshape((mini_batch_size, self.n_in)) self.output = softmax(T.dot(self.inpt, self.w) + self.b) self.y_out = T.argmax(self.output, axis=1) self.inpt_dropout = dropout_layer( inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
def inner(mean, var): # Generate samples of the distribution. samples = rng.normal(size=mean.shape) std = T.sqrt(var) samples = samples * std + mean if axis == 1: result = softmax(samples) # XXX result.name = 'susp1' if axis == 2: samples_flat = samples.reshape((samples.shape[0] * samples.shape[1], samples.shape[2])) result_flat = softmax(samples_flat) result = result.reshape(samples.shape) return result, T.zeros_like(var)
def test_xent_thing_int32(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(3, 5).astype(config.floatX) y_val = numpy.asarray([2, 4, 1], dtype='int64') x = T.matrix('x') y = T.lvector('y') yi = T.cast(y, 'int32') expressions = [ T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]), T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi]) ] for expr in expressions: # Verify the optimizer worked on the expressions f = theano.function([x, y], expr, mode=mode) if verbose: theano.printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)] f(x_val, y_val) except Exception: theano.printing.debugprint(f) raise # Also verify the gradient wrt x g = theano.function([x, y], T.grad(expr, x), mode=mode) if verbose: theano.printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax in ops assert softmax_grad not in ops g(x_val, y_val) except Exception: theano.printing.debugprint(g) raise
def _get_output(self, layer_input): """Return layer's output. :param layer_input: Input in the format (n_batches, n_neurons). :return: Layer output. """ return softmax(layer_input)
def __init__(self, input, n_in, n_out, activation, rng=RandomState(1234) , layer_name="LogReg", W=None, b=None, borrow=True): # Weigth matrix W if W != None: self.W = shared(W, name=layer_name+"_W", borrow=borrow) elif activation in (relu,softplus): W_val = _asarray(rng.normal(loc=0, scale=0.01, size=(n_in, n_out)), dtype=floatX) self.W = shared(W_val, name=layer_name+"_W", borrow=borrow) else: self.W = shared(zeros((n_in, n_out), dtype=floatX), name=layer_name+"_W", borrow=borrow) # Bias vector if b!=None: self.b = shared(b, name=layer_name+"_b", borrow=borrow) elif activation in (relu,softplus): b_val = ones((n_out,), dtype=floatX) self.b = shared(value=b_val, borrow=True) else: self.b = shared(zeros((n_out,), dtype=floatX), name=layer_name+"_b", borrow=borrow) # T.flatten(input, ndim=2) # Vector of prediction probabilities self.p_y_given_x = softmax(T.dot(input, self.W) + self.b) # Prediction self.y_pred = T.argmax(self.p_y_given_x, axis=1) # Parameters of the model self.params = [self.W, self.b]
def __init__(self,input,n_in,n_out): ''' >>>type input: T.TensorType >>>para input: input data >>>type n_in: int >>>para n_in: num of input neurons >>>type n_out: int >>>para n_out: num of output neurons ''' self.w=theano.shared( value=np.zeros((n_in,n_out),dtype=theano.config.floatX), name='w', borrow=True ) #self.b=theano.shared( # value=np.zeros((n_out,),dtype=theano.config.floatX), # name='b', # borrow=True # ) self.param=[self.w] self.output=softmax(T.dot(input,self.w)) self.predict=T.argmax(self.output,axis=1)
def bench_ConvMed(batchsize): data_x.value = randn(n_examples, 1, 96, 96) w0 = shared(rand(6, 1, 7, 7) * numpy.sqrt(6 / (25.))) b0 = shared(zeros(6)) w1 = shared(rand(16, 6, 7, 7) * numpy.sqrt(6 / (25.))) b1 = shared(zeros(16)) vv = shared(rand(16*8*8, 120) * numpy.sqrt(6.0/16./25)) cc = shared(zeros(120)) v = shared(zeros(120, outputs)) c = shared(zeros(outputs)) params = [w0, b0, w1, b1, v, c, vv, cc] c0 = tanh(conv2d(sx, w0, image_shape=(batchsize, 1, 96, 96), filter_shape=(6,1,7,7)) + b0.dimshuffle(0, 'x', 'x')) s0 = tanh(max_pool_2d(c0, (3,3))) # this is not the correct leNet5 model, but it's closer to c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 30, 30), filter_shape=(16,6,7,7)) + b1.dimshuffle(0, 'x', 'x')) s1 = tanh(max_pool_2d(c1, (3,3))) p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv)+cc), v)+c) nll = -log(p_y_given_x)[arange(sy.shape[0]), sy] cost = nll.mean() gparams = grad(cost, params) train = function([si, nsi], cost, updates=[(p,p-lr*gp) for p,gp in zip(params, gparams)]) eval_and_report(train, "ConvMed", [batchsize], N=120)
def bench_deep1000(): w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000))) b0 = shared(zeros(1000)) w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000))) b1 = shared(zeros(1000)) w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000))) b2 = shared(zeros(1000)) v = shared(zeros(1000, outputs)) c = shared(zeros(outputs)) params = [w0, b0, w1, b1, w2, b2, v, c] h0 = tanh(dot(sx, w0) + b0) h1 = tanh(dot(h0, w1) + b1) h2 = tanh(dot(h1, w2) + b2) p_y_given_x = softmax(dot(h2, v) + c) nll = -log(p_y_given_x)[arange(sy.shape[0]), sy] cost = nll.mean() gparams = grad(cost, params) train = function([si, nsi], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)]) eval_and_report(train, "mlp_784_1000_1000_1000_10")
def __init__(self, input, n_in, n_out, rng, layer_name="LogReg", W=None, b=None, borrow=True, b_scale=0.1, W_scale=0.01): # Weigth matrix W if W != None: self.W = shared(W, name=layer_name + "_W", borrow=borrow) else: self.W = shared(zeros((n_in, n_out), dtype=floatX), name=layer_name + "_W", borrow=borrow) # Bias vector if b != None: self.b = shared(b, name=layer_name + "_b", borrow=borrow) else: self.b = shared(zeros((n_out, ), dtype=floatX), name=layer_name + "_b", borrow=borrow) # Vector of prediction probabilities self.p_y_given_x = softmax(T.dot(input, self.W) + self.b) # Prediction self.y_pred = T.argmax(self.p_y_given_x, axis=1) # Parameters of the model self.params = [self.W, self.b]
def AttMemLayer(incomings, params, linear=0, w_name=None, w=None, w_initializer=init.HeUniform()): ''' incomings = (u, u_shape, A, A_shape, C, C_shape) ''' ((u, u_shape), (A, A_shape), (C, C_shape)) = incomings u_repeat = T.extra_ops.repeat(u.reshape((-1, 1, u_shape[-1])), C_shape[1], 1) Au = T.concatenate((A, u_repeat), axis=2) w_name = w_name or 'AttMem_%d' % len(params) w_name = add_param((C_shape[-1] + u_shape[-1], 1), params, w_name, w, w_initializer) #Aup = T.tensordot(Au, params[w_name], axes=[len(C_shape)-1, 0]) #Aup = Aup.reshape((-1, C_shape[1])) #p = nnet.softmax(Aup) p = nnet.softmax( T.tensordot(Au, params[w_name], axes=[len(C_shape) - 1, 0]).reshape( (-1, C_shape[1]))) p_shape = A_shape[:2] O = (C * p[:, :, None]).sum(axis=1) return ((O, u_shape), (p, p_shape))
def build_2048_ann(self, nb, nh, nh2): ''' ''' #nb = input nodes #nh = first hidden layer size #nh2 = second hidden layer size print("building") w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, nh2))) w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 4))) input = T.dvector('input') target = T.wvector('target') x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0) x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0) x3 = Tann.softmax(T.dot(x2, w3)) error = T.sum(pow((target - x3), 2)) params = [w1, w2, w3] gradients = T.grad(error, params) backprops = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True) print("Built")
def build_rectified_linear2_ann(self, nb, nh, nh2): #784 #620 ''' Builds a neural network, using rectified linear units 2 as the activation function. ''' print("Building rectified linear ann") w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, nh2))) w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 10))) input = T.dvector('input') target = T.wvector('target') x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0) x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0) x3 = Tann.softmax(T.dot(x2, w3)) error = T.sum(pow((target - x3), 2)) params = [w1, w2, w3] gradients = T.grad(error, params) backprops = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True)
def test_softmax_optimizations_w_bias(self): x = tensor.matrix('x') b = tensor.vector('b') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot xe = op(x, one_of_n) fgraph = gof.FunctionGraph([x, b, one_of_n], [op(softmax(x + b), one_of_n)]) assert fgraph.outputs[0].owner.op == op #print 'BEFORE' #for node in fgraph.toposort(): # print node.op #print printing.pprint(node.outputs[0]) #print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) #print 'AFTER' #for node in fgraph.toposort(): # print node.op #print printing.pprint(node.outputs[0]) #print '====' assert len(fgraph.toposort()) == 2 assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert (fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias)
def lstm_output(self, y_prev, ch_prev): """calculates info to pass to next time step. ch_prev is a vector of size 2*hdim""" c_prev = ch_prev[:self.hdim]#T.vector('c_prev') h_prev = ch_prev[self.hdim:]#T.vector('h_prev') # gates (input, forget, output) i_t = sigmoid(T.dot(self.Ui, h_prev)) f_t = sigmoid(T.dot(self.Uf, h_prev)) o_t = sigmoid(T.dot(self.Uo, h_prev)) # new memory cell c_new_t = T.tanh(T.dot(self.Uc, h_prev)) # final memory cell c_t = f_t * c_prev + i_t * c_new_t # final hidden state h_t = o_t * T.tanh(c_t) # Input vector for softmax theta_t = T.dot(self.U, h_t) + self.b # Softmax prob vector y_hat_t = softmax(theta_t.T).T # Softmax wraps output in another list, why?? # (specifically it outputs a 2-d row, not a 1-d column) # y_hat_t = y_hat_t[0] # Compute new cost out_label = T.argmax(y_hat_t) # final joint state ch_t = T.concatenate([c_t, h_t]) return (out_label, ch_t), scan_module.until(T.eq(out_label, self.out_end))
def rnn_output(self, y_prev, h_prev): h_t = T.tanh(T.dot(self.Wh, h_prev)) # compute new out_label y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T out_label = T.argmax(y_hat_t) return (out_label, h_t), scan_module.until(T.eq(out_label, self.out_end))
def MultAttMemLayer(incomings, params, num_hid, linear=0, w_name=None, w=None, w_initializer=None): ''' hun_hid should be a tuple with length=len(w_name)-1 incomings = (u, u_shape, A, A_shape, C, C_shape) ''' if not w_name: _w_name = [None for _ in range(len(num_hid) + 1)] else: _w_name = [wn for wn in w_name] if not w: w = [None for _ in range(len(num_hid) + 1)] if not w_initializer: w_initializer = [init.HeUniform() for _ in range(len(num_hid) + 1)] ((u, u_shape), (A, A_shape), (C, C_shape)) = incomings u_repeat = T.extra_ops.repeat(u.reshape((-1, 1, u_shape[-1])), C_shape[1], 1) Au = T.concatenate((A, u_repeat), axis=2) _num_hid = (C_shape[-1] + u_shape[-1],) + num_hid + (1,) for i, nh in enumerate(_num_hid[:-1]): _w_name[i] = _w_name[i] or 'AttMem_%d' % len(params) _w_name[i] = add_param((nh, _num_hid[i+1]), params, _w_name[i], w[i], w_initializer[i]) Au = T.tensordot(Au, params[_w_name[i]], axes=[len(C_shape)-1, 0]) p = nnet.softmax(Au.reshape((-1, C_shape[1]))) p_shape = A_shape[:2] O = (C * p[:, :, None]).sum(axis = 1) return ((O, u_shape), (p, p_shape))
def bench_ConvSmall(batchsize): data_x.set_value(randn(n_examples, 1, 32, 32)) w0 = shared(rand(6, 1, 5, 5) * numpy.sqrt(6 / (25.))) b0 = shared(zeros(6)) w1 = shared(rand(16, 6, 5, 5) * numpy.sqrt(6 / (25.))) b1 = shared(zeros(16)) vv = shared(rand(16 * 5 * 5, 120) * numpy.sqrt(6.0 / 16. / 25)) cc = shared(zeros(120)) v = shared(zeros(120, outputs)) c = shared(zeros(outputs)) params = [w0, b0, w1, b1, v, c, vv, cc] c0 = tanh(conv2d(sx, w0, image_shape=(batchsize, 1, 32, 32), filter_shape=(6, 1, 5, 5)) + b0.dimshuffle(0, 'x', 'x')) # this is not the correct leNet5 model, but it's closer to s0 = tanh(max_pool_2d(c0, (2, 2))) c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 14, 14), filter_shape=(16, 6, 5, 5)) + b1.dimshuffle(0, 'x', 'x')) s1 = tanh(max_pool_2d(c1, (2, 2))) p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv) + cc), v) + c) nll = -log(p_y_given_x)[arange(sy.shape[0]), sy] cost = nll.mean() gparams = grad(cost, params) train = function([si, nsi], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)]) eval_and_report(train, "ConvSmall", [batchsize], N=600)
def bench_deep1000(): w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000))) b0 = shared(zeros(1000)) w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000))) b1 = shared(zeros(1000)) w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000))) b2 = shared(zeros(1000)) v = shared(zeros(1000, outputs)) c = shared(zeros(outputs)) params = [w0, b0, w1, b1, w2, b2, v, c] h0 = tanh(dot(sx, w0) + b0) h1 = tanh(dot(h0, w1) + b1) h2 = tanh(dot(h1, w2) + b2) p_y_given_x = softmax(dot(h2, v) + c) nll = -log(p_y_given_x)[arange(sy.shape[0]), sy] cost = nll.mean() gparams = grad(cost, params) train = function( [si, nsi], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)]) eval_and_report(train, "mlp_784_1000_1000_1000_10")
def _step(h_tm1, y_tm1): h_t = sigmoid( T.dot(self.Whh[1], h_tm1) + T.dot(self.Whx2, y_tm1) + self.bh[1]) a = T.dot(self.Why, h_t) + self.b y_t = T.reshape(softmax(a), a.shape) return [h_t, y_t]
def test_softmax_optimizations_w_bias_vector(self): x = tensor.vector('x') b = tensor.vector('b') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot fgraph = gof.FunctionGraph( [x, b, one_of_n], [op(softmax(x + b), one_of_n)]) assert fgraph.outputs[0].owner.op == op #print 'BEFORE' #for node in fgraph.toposort(): # print node.op #print printing.pprint(node.outputs[0]) #print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) #print 'AFTER' #for node in fgraph.toposort(): # print node.op #print '====' assert len(fgraph.toposort()) == 3 assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert (fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias)
def recurrence(xp_t, h_t_pre1, cxs): # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20) # (n, winx, 20) = T.concatenate((((n, winx-1, 20)), ((n, 1, 20))), axis=1) # context_x # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。 cxs = T.concatenate( ( cxs[:, 1:, :], # shape=(n, winx-1, 20) xp_t.dimshuffle(0, 'x', 1)), # shape=(n, 1, 20) axis=1) # shape=(n, winx, 20) exs = T.dot(tanh(T.dot(cxs, qx)), rx) # shape=(n, winx, 1) exs = T.Rebroadcast((2, True))(exs) # axis=2进行broadcast, 使其可被丢掉 axs0 = softmax(exs.dimshuffle( 0, 1)) # shape=(n, winx),降一维。因为softmax按行处理。 axs = axs0.dimshuffle(0, 1, 'x') # shape=(n, winx, 1), 升一维。还原回去。 axs = T.Rebroadcast((2, True))(axs) # axis=2进行broadcast, 使其可做乘法。 # (n, 20) = T.sum((n, winx, 20) * (n, winx, 1), axis=1) xc = T.sum(cxs * axs, axis=1) # shape=(n, 20) # gru unit z_r = sigmoid( T.dot(ui[:2], xp_t.T) + T.dot(vc[:2], xc.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 20) c = tanh( T.dot(ui[2], xp_t.T) + T.dot(vc[2], xc.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # shape=(n, 20) return [h_t, cxs, axs0] # 每处位置的权重也返回, shape=(n, winx)
def __init__(self, input, n_in, n_out, activation, rng, layer_name="LogReg", W=None, b=None, borrow=True): # Weigth matrix W if W != None: self.W = shared(W, name=layer_name+"_W", borrow=borrow) elif activation in (relu,softplus): W_val = _asarray(rng.normal(loc=0, scale=0.01, size=(n_in, n_out)), dtype=floatX) self.W = shared(W_val, name=layer_name+"_W", borrow=borrow) else: self.W = shared(zeros((n_in, n_out), dtype=floatX), name=layer_name+"_W", borrow=borrow) # Bias vector if b!=None: self.b = shared(b, name=layer_name+"_b", borrow=borrow) elif activation in (relu,softplus): b_val = ones((n_out,), dtype=floatX) self.b = shared(value=b_val, borrow=True) else: self.b = shared(zeros((n_out,), dtype=floatX), name=layer_name+"_b", borrow=borrow) # Vector of prediction probabilities self.p_y_given_x = softmax(T.dot(input, self.W) + self.b) # Prediction self.y_pred = T.argmax(self.p_y_given_x, axis=1) # Parameters of the model self.params = [self.W, self.b]
def test_softmax_optimizations_w_bias2(self): x = tensor.matrix('x') b = tensor.vector('b') c = tensor.vector('c') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot env = gof.Env( [x, b, c, one_of_n], [op(softmax(T.add(x,b,c)), one_of_n)]) assert env.outputs[0].owner.op == op print 'BEFORE' for node in env.toposort(): print node.op print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) print 'AFTER' for node in env.toposort(): print node.op print '====' assert len(env.toposort()) == 3 assert str(env.outputs[0].owner.op) == 'OutputGuard' assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_softmax_optimizations_w_bias2(self): x = tensor.matrix('x') b = tensor.vector('b') c = tensor.vector('c') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot env = gof.Env([x, b, c, one_of_n], [op(softmax(T.add(x, b, c)), one_of_n)]) assert env.outputs[0].owner.op == op print 'BEFORE' for node in env.toposort(): print node.op print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) print 'AFTER' for node in env.toposort(): print node.op print '====' assert len(env.toposort()) == 3 assert str(env.outputs[0].owner.op) == 'OutputGuard' assert env.outputs[0].owner.inputs[ 0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def lstm_output(self, y_prev, ch_prev): """calculates info to pass to next time step. ch_prev is a vector of size 2*hdim""" c_prev = ch_prev[:self.hdim] #T.vector('c_prev') h_prev = ch_prev[self.hdim:] #T.vector('h_prev') # gates (input, forget, output) i_t = sigmoid(T.dot(self.Ui, h_prev)) f_t = sigmoid(T.dot(self.Uf, h_prev)) o_t = sigmoid(T.dot(self.Uo, h_prev)) # new memory cell c_new_t = T.tanh(T.dot(self.Uc, h_prev)) # final memory cell c_t = f_t * c_prev + i_t * c_new_t # final hidden state h_t = o_t * T.tanh(c_t) # Input vector for softmax theta_t = T.dot(self.U, h_t) + self.b # Softmax prob vector y_hat_t = softmax(theta_t.T).T # Softmax wraps output in another list, why?? # (specifically it outputs a 2-d row, not a 1-d column) # y_hat_t = y_hat_t[0] # Compute new cost out_label = T.argmax(y_hat_t) # final joint state ch_t = T.concatenate([c_t, h_t]) return (out_label, ch_t), scan_module.until(T.eq(out_label, self.out_end))
def test_argmax_pushdown(): x = tensor.dmatrix() #test that the max_and_argmax is pushed down if the max is not used out = tensor.max_and_argmax( softmax(tensor.exp(tensor.tanh(sigmoid(x)))), axis=-1)[1] env = gof.Env( [x], [out]) theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) #print 'AFTER' #for node in env.toposort(): #print node.op assert len(env.toposort()) == 2 # an output_guard is second assert env.toposort()[0].op == tensor.basic._max_and_argmax assert str(env.toposort()[1].op) == 'OutputGuard' x = tensor.dmatrix() #test that the max_and_argmax is not pushed down if the max is used out = tensor.max_and_argmax( softmax(tensor.exp(tensor.tanh(sigmoid(x)))), axis=-1)[0] env = gof.Env( [x], [out]) backup = config.warn.argmax_pushdown_bug config.warn.argmax_pushdown_bug = False try: theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) finally: config.warn.argmax_pushdown_bug = backup #print 'AFTER' #for node in env.toposort(): #print node.op assert len(env.toposort()) == 4 # an output_guard is second assert isinstance(env.toposort()[0].op, tensor.Elemwise) assert isinstance(env.toposort()[1].op, Softmax) assert isinstance(env.toposort()[2].op, tensor.CAReduce) assert isinstance(env.toposort()[2].op.scalar_op, theano.scalar.Maximum) assert str(env.toposort()[3].op) == 'OutputGuard'
def drnn_timestep(self, x_t, old_cost, h_prev, ys): Lx_t = self.L[:, x_t] # gates (update, reset) h_t = T.tanh(T.dot(self.Wx, Lx_t) + T.dot(self.Wh, h_prev)) y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T cost = T.sum(-T.log(y_hat_t[ys, T.arange(ys.shape[0])])) return cost, h_t
def drnn_timestep(self, x_t, old_cost, h_prev, ys): Lx_t = self.L[:,x_t] # gates (update, reset) h_t = T.tanh(T.dot(self.Wx, Lx_t) + T.dot(self.Wh, h_prev)) y_hat_t = softmax((T.dot(self.U, h_t) + self.b).T).T cost = T.sum(-T.log(y_hat_t[ys, T.arange(ys.shape[0])])) return cost, h_t
def set_input(self, inpt, input_dropout, mini_batch_size): ''' Sets the input for the Spftmax Layer Layer, by reshaping it matrix of size 'mini_batch_size' x 'n_in', sets the output by forward pass using the 'activation_fn'. 'input_dropout' and 'output_dropout' are set using the dropout layer prescribed earlier. ''' self.inpt = inpt.reshape((mini_batch_size, self.n_in)) self.output = softmax((1 - self.p_dropout) * Tensor.dot(self.inpt, self.weights) + self.biases) self.y_out = Tensor.argmax(self.output, axis=1) self.input_dropout = dropout_layer( input_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) self.output_dropout = softmax( Tensor.dot(self.input_dropout, self.weights) + self.biases)
def forward_propagation_NAG(self, inpt, velocity, alph): z = T.dot(self.weights + alph*velocity, inpt) + self.biases.dimshuffle(0,'x') if self.last_flag == False: active = ReLU(z) else: active = softmax(z.T) return active
def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False): super(Module_Nclass, self).__init__() #boilerplate #self.x = module.Member(x) if x is not None else T.matrix('input') if x is not None: self.x = (x) else: self.x = T.matrix('input') #self.targ = module.Member(targ) if targ is not None else T.lvector() if targ is not None: self.targ = (targ) else: self.targ = T.lvector() #self.w = module.Member(w) if w is not None else module.Member(T.dmatrix()) if w is not None: self.w = (w) else: self.w = (T.dmatrix()) #self.b = module.Member(b) if b is not None else module.Member(T.dvector()) if b is not None: self.b = (b) else: self.b = (T.dvector()) #self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar()) if lr is not None: self.lr = (lr) else: self.lr = (T.dscalar()) self.params = [p for p in [self.w, self.b] if p.owner is None] linear_output = T.dot(self.x, self.w) + self.b (xent, softmax, max_pr, argmax) = NN.crossentropy_softmax_max_and_argmax_1hot( linear_output, self.targ) sum_xent = T.sum(xent) self.softmax = softmax self.argmax = argmax self.max_pr = max_pr self.sum_xent = sum_xent # Softmax being computed directly. softmax_unsupervised = NN.softmax(linear_output) self.softmax_unsupervised = softmax_unsupervised #compatibility with current implementation of stacker/daa or something #TODO: remove this, make a wrapper self.cost = self.sum_xent self.input = self.x # TODO: I want to make output = linear_output. self.output = self.softmax_unsupervised #define the apply method self.pred = T.argmax(linear_output, axis=1) #self.apply = module.Method([self.input], self.pred) #self.validate = module.Method([self.input, self.targ], [self.cost, self.argmax, self.max_pr]) #self.softmax_output = module.Method([self.input], self.softmax_unsupervised) if self.params: gparams = T.grad(sum_xent, self.params)
def __init__(self, input, w, b, params=[]): self.output=nnet.softmax(theano.dot(input, w)+b) self.l1=abs(w).sum() self.l2_sqr = (w**2).sum() self.argmax=theano.tensor.argmax(theano.dot(input, w)+b, axis=input.ndim-1) self.input = input self.w = w self.b = b self.params = params
def predictInstance(self,data): ''' >>>calculate new data >>>type data:T.tensor4 >>>para data:newly come data ''' p=softmax(T.dot(data,self.w)) return T.argmax(p,axis=1)
def drnn_output(self, x_t, old_label, h_prev): Lx_t = self.L[:,x_t] h_t = T.tanh(T.dot(self.Wx, Lx_t) + T.dot(self.Wh, h_prev)) print h_t.type y_hat_t = softmax(T.dot(self.U, h_t) + self.b)[0] out_label = T.argmax(y_hat_t) return out_label, h_t
def test_xent_thing_int32(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(3,5) b_val = rng.randn(5) y_val = numpy.asarray([2,4,1], dtype='int64') x = T.dmatrix('x') b = T.dvector('b') y = T.lvector('y') yi = T.cast(y, 'int32') expressions = [ T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])), -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]), T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi]) ] for expr in expressions: # Verify the optimizer worked on the expressions f = theano.function([x,y], expr, mode=mode) if verbose: theano.printing.debugprint(f) try: assert len(f.maker.env.toposort()) == 5 f(x_val, y_val) except Exception: theano.printing.debugprint(f) raise # Also verify the gradient wrt x g = theano.function([x,y], T.grad(expr, x), mode=mode) if verbose: theano.printing.debugprint(g) try: assert len(g.maker.env.toposort()) == 5 g(x_val, y_val) except Exception: theano.printing.debugprint(g) raise