def __build_theano__(self): x = ivector(name="x") y = ivector(name="y") U, V, W = self.U, self.V, self.W def forword_prop_step(x_t, s_t_prev, U, V, W): s_t = T.tanh(U[:,x_t] + V.dot(s_t_prev)) o_t = T.nnet.softmax(W.dot(s_t)) return [o_t[0], s_t] [o,s], updates = theano.scan(forword_prop_step, sequences=x, outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))], non_sequences=[U,V,W], truncate_gradient=4, strict=True) prediction = T.argmax(o, axis=1) o_error = T.sum(T.nnet.categorical_crossentropy(o, y)) dU = T.grad(o_error, U) dV = T.grad(o_error, V) dW = T.grad(o_error, W) self.forward = theano.function([x], o) self.predict = theano.function([x], prediction) self.c_error = theano.function([x, y], o_error) self.bptt = theano.function([x, y], [dU, dV, dW]) learning_rate = scalar(name="learning_rate") self.sgd_step = theano.function([x, y, learning_rate], [], updates=[(self.U, self.U-learning_rate*dU), (self.V, self.V-learning_rate*dV), (self.W, self.W-learning_rate*dW)])
def test_infer_shape(self): rng = np.random.RandomState(3453) adtens4 = dtensor4() aivec = ivector() aivec_val = [3, 4, 2, 5] adtens4_val = rng.rand(*aivec_val) self._compile_and_check( [adtens4, aivec], [SpecifyShape()(adtens4, aivec)], [adtens4_val, aivec_val], SpecifyShape, )
def test_bad_number_of_shape(self): # Test that the number of dimensions provided is good specify_shape = SpecifyShape() x = vector() shape_vec = ivector() xval = np.random.rand(2).astype(config.floatX) with pytest.raises(AssertionError): specify_shape(x, []) with pytest.raises(AssertionError): specify_shape(x, [2, 2]) f = theano.function([x, shape_vec], specify_shape(x, shape_vec), mode=self.mode) assert isinstance( [ n for n in f.maker.fgraph.toposort() if isinstance(n.op, SpecifyShape) ][0].inputs[0].type, self.input_type, ) with pytest.raises(AssertionError): f(xval, []) with pytest.raises(AssertionError): f(xval, [2, 2]) x = matrix() xval = np.random.rand(2, 3).astype(config.floatX) for shape_ in [(), (1, ), (2, 3, 4)]: with pytest.raises(AssertionError): specify_shape(x, shape_) f = theano.function([x, shape_vec], specify_shape(x, shape_vec), mode=self.mode) assert isinstance( [ n for n in f.maker.fgraph.toposort() if isinstance(n.op, SpecifyShape) ][0].inputs[0].type, self.input_type, ) with pytest.raises(AssertionError): f(xval, shape_)
def make_node(self, x, weights): warnings.warn(( "Tile op is deprecated, use tile function instead."), stacklevel=3) x = basic.as_tensor_variable(x) if x.dtype not in BinCountOp.compatible_type: raise TypeError("Inputs dtype must be an integer.") # Some dtypes are not supported by numpy's implementation of bincount. # Until another one is available, we should fail at graph construction # time, not wait for execution. int_bitwidth = theano.gof.python_int_bitwidth() if int_bitwidth == 64: numpy_unsupported_dtypes = ('uint64',) if int_bitwidth == 32: numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64') intp_bitwidth = theano.gof.local_bitwidth() if intp_bitwidth == 32: out_type = basic.ivector() elif intp_bitwidth == 64: out_type = basic.lvector() if x.dtype in numpy_unsupported_dtypes: raise TypeError( ("Input dtypes %s are not supported by numpy.bincount, " % numpy_unsupported_dtypes), x.dtype) if x.ndim != 1: raise TypeError("Inputs must be of dimension 1.") if weights is None: weights = theano.gof.Constant(theano.gof.Generic(), None) else: weights = basic.as_tensor_variable(weights) out_type = basic.dvector() if weights.ndim != 1: raise TypeError("Weights cannot have a number of" "dimension different of 1.") return theano.Apply(self, [x, weights], [out_type])
def __build_theano__(self): x = ivector("x") y = ivector("y") hidden_dim = self.hidden_dim word_dim = self.word_dim Wxi, Whi, Wci, Wxf, Whf, Wcf, Wxc, Whc, Wxo, Who, Wco, Wo = self.Wxi, self.Whi, self.Wci, self.Wxf, self.Whf, self.Wcf, self.Wxc, self.Whc, self.Wxo, self.Who, self.Wco, self.Wo def forward_prop(x_t, c_prev_t, h_prev_t, Wxi, Whi, Wci, Wxf, Whf, Wcf, Wxc, Whc, Wxo, Who, Wco, Wo): input_gate = T.tanh(Wxi.dot(x_t) + Whi.dot(h_prev_t) + Wci*c_prev_t) forget_gate = T.tanh(Wxf.dot(x_t) + Whf.dot(h_prev_t) + Wcf*c_prev_t) a_c_t = Wxc.dot(x_t) + Whc.dot(h_prev_t) c_t = input_gate * T.nnet.sigmoid(a_c_t) + forget_gate * c_prev_t output_gate = T.tanh(Wxo.dot(x_t) + Who.dot(h_prev_t) + Wco*c_t) h_t = output_gate * T.tanh(c_t) o_t = Wo.dot(h_t) return [o_t[0], c_t, h_t] [o, c, h], updates = theano.scan(forward_prop, sequences = x, outputs_info = [None, dict(initial=T.zeros(hidden_dim)), dict(initial=T.zeros(hidden_dim))], non_sequences = [Wxi, Whi, Wci, Wxf, Whf, Wcf, Wxc, Whc, Wxo, Who, Wco, Wo], strict = True) prediction = T.argmax(o, axis=1) c_error = T.sum(T.nnet.categorical_crossentropy(o, y)) dWxi = T.grad(c_error, Wxi) dWhi = T.grad(c_error, Whi) dWci = T.grad(c_error, Wci) dWxf = T.grad(c_error, Wxf) dWhf = T.grad(c_error, Whf) dWcf = T.grad(c_error, Wcf) dWxc = T.grad(c_error, Wxc) dWhc = T.grad(c_error, Whc) dWxo = T.grad(c_error, Wxo) dWho = T.grad(c_error, Who) dWco = T.grad(c_error, Wco) dWo = T.grad(c_error, Wo) forward = theano.function([x], o) predict = theano.function([x], prediction) learning_rate = scalar("learning_rate") sgd_step = theano.function([x,y], [], updates = [(self.Wxi, self.Wxi-learning_rate*dWxi), (self.Whi, self.Whi-learning_rate*dWhi), (self.Wci, self.Wci-learning_rate*dWci), (self.Wxf, self.Wxf-learning_rate*dWxf), (self.Whf, self.Whf-learning_rate*dWhf), (self.Wcf, self.Wcf-learning_rate*dWcf), (self.Wxo, self.Wxo-learning_rate*dWxo), (self.Who, self.Who-learning_rate*dWho), (self.Wco, self.Wco-learning_rate*dWco), (self.Wxc, self.Wxc-learning_rate*dWxc), (self.Whc, self.Whc-learning_rate*dWhc), (self.Wo, self.Wo-learning_rate*dWo)])