Beispiel #1
0
 def __build_theano__(self):
     x = ivector(name="x")
     y = ivector(name="y")
     U, V, W = self.U, self.V, self.W
     
     def forword_prop_step(x_t, s_t_prev, U, V, W):
         s_t = T.tanh(U[:,x_t] + V.dot(s_t_prev))
         o_t = T.nnet.softmax(W.dot(s_t))
         return [o_t[0], s_t]
     
     [o,s], updates = theano.scan(forword_prop_step, sequences=x, 
                                  outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))], 
                                  non_sequences=[U,V,W], truncate_gradient=4, strict=True)
     
     prediction = T.argmax(o, axis=1)
     o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
     
     dU = T.grad(o_error, U)
     dV = T.grad(o_error, V)
     dW = T.grad(o_error, W)
     
     self.forward = theano.function([x], o)
     self.predict = theano.function([x], prediction)
     self.c_error = theano.function([x, y], o_error)
     self.bptt = theano.function([x, y], [dU, dV, dW])
     
     learning_rate = scalar(name="learning_rate")
     self.sgd_step = theano.function([x, y, learning_rate], [], 
                                     updates=[(self.U, self.U-learning_rate*dU),
                                              (self.V, self.V-learning_rate*dV),
                                              (self.W, self.W-learning_rate*dW)])
Beispiel #2
0
 def test_infer_shape(self):
     rng = np.random.RandomState(3453)
     adtens4 = dtensor4()
     aivec = ivector()
     aivec_val = [3, 4, 2, 5]
     adtens4_val = rng.rand(*aivec_val)
     self._compile_and_check(
         [adtens4, aivec],
         [SpecifyShape()(adtens4, aivec)],
         [adtens4_val, aivec_val],
         SpecifyShape,
     )
Beispiel #3
0
    def test_bad_number_of_shape(self):
        # Test that the number of dimensions provided is good
        specify_shape = SpecifyShape()

        x = vector()
        shape_vec = ivector()
        xval = np.random.rand(2).astype(config.floatX)
        with pytest.raises(AssertionError):
            specify_shape(x, [])
        with pytest.raises(AssertionError):
            specify_shape(x, [2, 2])

        f = theano.function([x, shape_vec],
                            specify_shape(x, shape_vec),
                            mode=self.mode)
        assert isinstance(
            [
                n for n in f.maker.fgraph.toposort()
                if isinstance(n.op, SpecifyShape)
            ][0].inputs[0].type,
            self.input_type,
        )
        with pytest.raises(AssertionError):
            f(xval, [])
        with pytest.raises(AssertionError):
            f(xval, [2, 2])

        x = matrix()
        xval = np.random.rand(2, 3).astype(config.floatX)
        for shape_ in [(), (1, ), (2, 3, 4)]:
            with pytest.raises(AssertionError):
                specify_shape(x, shape_)
            f = theano.function([x, shape_vec],
                                specify_shape(x, shape_vec),
                                mode=self.mode)
            assert isinstance(
                [
                    n for n in f.maker.fgraph.toposort()
                    if isinstance(n.op, SpecifyShape)
                ][0].inputs[0].type,
                self.input_type,
            )
            with pytest.raises(AssertionError):
                f(xval, shape_)
Beispiel #4
0
    def make_node(self, x, weights):
        warnings.warn((
            "Tile op is deprecated, use tile function instead."),
            stacklevel=3)

        x = basic.as_tensor_variable(x)

        if x.dtype not in BinCountOp.compatible_type:
            raise TypeError("Inputs dtype must be an integer.")

        # Some dtypes are not supported by numpy's implementation of bincount.
        # Until another one is available, we should fail at graph construction
        # time, not wait for execution.
        int_bitwidth = theano.gof.python_int_bitwidth()
        if int_bitwidth == 64:
            numpy_unsupported_dtypes = ('uint64',)
        if int_bitwidth == 32:
            numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64')
        intp_bitwidth = theano.gof.local_bitwidth()
        if intp_bitwidth == 32:
            out_type = basic.ivector()
        elif intp_bitwidth == 64:
            out_type = basic.lvector()

        if x.dtype in numpy_unsupported_dtypes:
            raise TypeError(
                ("Input dtypes %s are not supported by numpy.bincount, "
                 % numpy_unsupported_dtypes), x.dtype)

        if x.ndim != 1:
            raise TypeError("Inputs must be of dimension 1.")

        if weights is None:
            weights = theano.gof.Constant(theano.gof.Generic(), None)
        else:
            weights = basic.as_tensor_variable(weights)
            out_type = basic.dvector()
            if weights.ndim != 1:
                raise TypeError("Weights cannot have a number of"
                                "dimension different of 1.")

        return theano.Apply(self, [x, weights], [out_type])
Beispiel #5
0
 def __build_theano__(self):
     x = ivector("x")
     y = ivector("y")
     hidden_dim = self.hidden_dim
     word_dim = self.word_dim
     
     Wxi, Whi, Wci, Wxf, Whf, Wcf, Wxc, Whc, Wxo, Who, Wco, Wo = self.Wxi, self.Whi, self.Wci, self.Wxf, self.Whf, self.Wcf, self.Wxc, self.Whc, self.Wxo, self.Who, self.Wco, self.Wo
     
     def forward_prop(x_t, c_prev_t, h_prev_t,
                      Wxi, Whi, Wci, Wxf, Whf, Wcf, Wxc, Whc, Wxo, Who, Wco, Wo):
         input_gate = T.tanh(Wxi.dot(x_t) + Whi.dot(h_prev_t) + Wci*c_prev_t)
         forget_gate = T.tanh(Wxf.dot(x_t) + Whf.dot(h_prev_t) + Wcf*c_prev_t)
         
         a_c_t = Wxc.dot(x_t) + Whc.dot(h_prev_t)
         c_t = input_gate * T.nnet.sigmoid(a_c_t) + forget_gate * c_prev_t
         
         output_gate = T.tanh(Wxo.dot(x_t) + Who.dot(h_prev_t) + Wco*c_t)
         h_t = output_gate * T.tanh(c_t)
         o_t = Wo.dot(h_t)
         
         return [o_t[0], c_t, h_t]
     
     [o, c, h], updates = theano.scan(forward_prop, sequences = x, 
                             outputs_info = [None, dict(initial=T.zeros(hidden_dim)), dict(initial=T.zeros(hidden_dim))],
                             non_sequences = [Wxi, Whi, Wci, Wxf, Whf, Wcf, Wxc, Whc, Wxo, Who, Wco, Wo], 
                             strict = True)
     
     prediction = T.argmax(o, axis=1)
     c_error = T.sum(T.nnet.categorical_crossentropy(o, y))
     
     dWxi = T.grad(c_error, Wxi)
     dWhi = T.grad(c_error, Whi)
     dWci = T.grad(c_error, Wci)
     dWxf = T.grad(c_error, Wxf)
     dWhf = T.grad(c_error, Whf)
     dWcf = T.grad(c_error, Wcf)
     dWxc = T.grad(c_error, Wxc)
     dWhc = T.grad(c_error, Whc)
     dWxo = T.grad(c_error, Wxo)
     dWho = T.grad(c_error, Who)
     dWco = T.grad(c_error, Wco)
     dWo = T.grad(c_error, Wo)
     
     forward = theano.function([x], o)
     predict = theano.function([x], prediction)
     
     learning_rate = scalar("learning_rate")
     
     sgd_step = theano.function([x,y], [],
                                updates = [(self.Wxi, self.Wxi-learning_rate*dWxi),
                                           (self.Whi, self.Whi-learning_rate*dWhi),
                                           (self.Wci, self.Wci-learning_rate*dWci),
                                           (self.Wxf, self.Wxf-learning_rate*dWxf),
                                           (self.Whf, self.Whf-learning_rate*dWhf),
                                           (self.Wcf, self.Wcf-learning_rate*dWcf),
                                           (self.Wxo, self.Wxo-learning_rate*dWxo),
                                           (self.Who, self.Who-learning_rate*dWho),
                                           (self.Wco, self.Wco-learning_rate*dWco),
                                           (self.Wxc, self.Wxc-learning_rate*dWxc),
                                           (self.Whc, self.Whc-learning_rate*dWhc),
                                           (self.Wo, self.Wo-learning_rate*dWo)])