def get_output(self, train=False):
		X = self.get_input(train)
		padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
		X = X.dimshuffle((1, 0, 2))

		xsum = T.dot(X, self.W_sum) + self.b_sum ### get gate's input
		xmax = T.dot(X, self.W_max) + self.b_max
		xmin = T.dot(X, self.W_min) + self.b_min
		xsubt = T.dot(X, self.W_subt) + self.b_subt
		xmul = T.dot(X, self.W_mul) + self.b_mul
		xres = T.dot(X, self.W_res) + self.b_res
		xone = T.dot(X, self.W_one) + self.b_one

		xi = T.dot(X, self.W_i) + self.b_i
		xf = T.dot(X, self.W_f) + self.b_f
		xc = T.dot(X, self.W_c) + self.b_c
		xo = T.dot(X, self.W_o) + self.b_o

		[outputs, memories], updates = theano.scan(
			self._step,
			sequences=[xsum, xmax, xmin, xsubt, xmul, xres, xone, xi, xf, xo, xc, padded_mask], ### update sequence input
			outputs_info=[
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
			],
			non_sequences=[self.U_sum, self.U_max, self.U_min, self.U_subt, self.U_mul, self.U_res, self.U_one, self.U_i, self.U_f, self.U_o, self.U_c], ### add gate's weight matrix
			truncate_gradient=self.truncate_gradient)

		if self.return_sequences:
			return outputs.dimshuffle((1, 0, 2))
		return outputs[-1]
Beispiel #2
0
	def get_output(self, train=False):
		X = self.get_input(train)
		padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
		X = X.dimshuffle((1, 0, 2))

		# Create X_tm1 sequence through zero left-padding
		Z = T.zeros_like(X)
		X_tm1 = T.concatenate(([Z[0]], X), axis=0)

		
		x_f = T.dot(X, self.W_xf) + self.b_f
		x_z = T.dot(X, self.W_xz) + self.b_z
		x_o = T.dot(X, self.W_xo) + self.b_o

		h_info = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
		c_info = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)

		[outputs, cells], updates = theano.scan(
		    self._step,
		    sequences=[x_f, x_z, x_o, padded_mask, X_tm1],
		    outputs_info=[h_info, c_info],
		    non_sequences=[self.U_hf, self.U_xz, self.U_xo],
		    truncate_gradient=self.truncate_gradient,
		    go_backwards=self.go_backwards)

		if self.return_sequences:
		    return outputs.dimshuffle((1, 0, 2))
		return outputs[-1]
Beispiel #3
0
    def lstm_cost(self, words):
        x = self.L[words]

        # Each element of x is (word_embed,) shape
        xi = T.dot(x, self.W_i) + self.b_i
        xf = T.dot(x, self.W_f) + self.b_f
        xc = T.dot(x, self.W_c) + self.b_c
        xo = T.dot(x, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._step,
            sequences=[xi, xf, xc, xo],
            outputs_info=[
                alloc_zeros_matrix(self.n_lstm_embed),
                alloc_zeros_matrix(self.n_lstm_embed),
            ],
            non_sequences=[
                self.U_i, self.U_f, self.U_o, self.U_c,
            ],
            truncate_gradient=-1
        )

        r = T.dot(self.Lprime, outputs[-1])

        return T.nnet.softmax(r)
Beispiel #4
0
 def _get_initial_states(self, X):
     # batch_size = X.shape[0]
     # canvas = self.init_canvas.dimshuffle('x', 0, 1, 2).repeat(batch_size,
     #                                                           axis=0)
     # init_enc = self.init_h_enc.dimshuffle('x', 0).repeat(batch_size, axis=0)
     # init_dec = self.init_h_dec.dimshuffle('x', 0).repeat(batch_size, axis=0)
     canvas = alloc_zeros_matrix(*X.shape)  # + self.init_canvas[None, :, :, :]
     init_enc = alloc_zeros_matrix(X.shape[0], self.h_dim)  # + self.init_h_enc[None, :]
     init_dec = alloc_zeros_matrix(X.shape[0], self.h_dim)  # + self.init_h_dec[None, :]
     return canvas, init_enc, init_dec
Beispiel #5
0
    def get_output(self, train=False):
        X = self.get_input(train)
        X = X.dimshuffle((1, 0, 2))

        # scan = theano symbolic loop.
        # See: http://deeplearning.net/software/theano/library/scan.html
        # Iterate over the first dimension of the x array (=time).
        [H1, H2], updates = theano.scan(
            self._step,  
            sequences=[X],
            outputs_info=[alloc_zeros_matrix(X.shape[1], self.output_dim),
                          dict(initial = alloc_zeros_matrix(self.sh, X.shape[1], self.output_dim), taps = [-1, -self.sh])])

        if self.return_sequences:
            return H2.dimshuffle((1, 0, 2))
        return H2[-1]
Beispiel #6
0
    def get_output(self, train=False):
        X = self.get_input(train)
        X = X.dimshuffle((1, 0, 2))

        #X_ = T.dot(X, self.W1) + self.b1
        [H1, C1], updates = theano.scan(
            self._step,
            sequences=[X],
            outputs_info=[
                alloc_zeros_matrix(X.shape[1], self.output_dim), 
                alloc_zeros_matrix(X.shape[1], self.output_dim)],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return H1.dimshuffle((1, 0, 2))
        return H1[-1]
Beispiel #7
0
    def get_output(self, train):
        '''Transform inputs to this layer into outputs for the layer.
        Parameters
        ----------
        inputs : dict of theano expressions
            Symbolic inputs to this layer, given as a dictionary mapping string
            names to Theano expressions. See :func:`base.Layer.connect`.
        Returns
        -------
        outputs : dict of theano expressions
            A map from string output names to Theano expressions for the outputs
            from this layer. This layer type generates a "pre" output that gives
            the unit activity before applying the layer's activation function,
            and a "hid" output that gives the post-activation values.
        updates : sequence of update pairs
            A sequence of updates to apply to this layer's state inside a theano
            function.
        '''
        X = self.get_input(train)
        X = X.dimshuffle((1,0,2))
        x = E.tools.TT.dot(X, self.W) + self.b

        outputs, updates = theano.scan(
            self._step,
            sequences=[E.tools.TT.arange(x.shape[0]), x],
            outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim),
            truncate_gradient=self.truncate_gradient,
            )
        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]
Beispiel #8
0
    def get_output(self, train=False):
        X = self.get_input(train)
        X = X.dimshuffle((1, 0, 2))

        # scan = theano symbolic loop.
        # See: http://deeplearning.net/software/theano/library/scan.html
        # Iterate over the first dimension of the x array (=time).
        [H1, H2], updates = theano.scan(
            self._step,  
            sequences=[X],
            outputs_info=[T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                          T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return H2.dimshuffle((1, 0, 2))
        return H2[-1]
Beispiel #9
0
def batchargmax(tensor, maximums):
     result, updates = theano.scan(
                                        fn=batchargmax_helper,
                                        sequences=[tensor,maximums],
                                        outputs_info=
                                          [alloc_zeros_matrix(tensor.shape[2])]
                                    )

     return result
Beispiel #10
0
def argmax(tensor, maximums):

    [v,score], updates = theano.scan(
                                        fn=argmax2args_step,
                                        sequences=[tensor,maximums],
                                        outputs_info=
                                          [alloc_zeros_matrix(tensor.shape[1]),
                                           theano.shared(np.cast[theano.config.floatX](-9999999.0) )]
                                    )
    return v[-1]
Beispiel #11
0
    def get_forward_output(self, train):
        X = self.get_input(train)
        X = X.dimshuffle((1,0,2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._forward_step,
            sequences=[xi, xf, xo, xc],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
            truncate_gradient=self.truncate_gradient
        )
        return outputs.dimshuffle((1,0,2))
Beispiel #12
0
 def add_state(self, name, dim):
     if name in self.namespace:
         raise Exception('Duplicate node identifier: ' + name)
     self.namespace.add(name)
     self.state_order.append(name)
     inps = self.input
     if isinstance(inps, dict):
         batch_size = inps.values()[0].shape[0]
     else:
         batch_size = inps.shape[0]
     self.states[name] = T.unbroadcast(alloc_zeros_matrix(batch_size, dim), 1)
     self.state_config.append({'name': name, 'dim': dim})
Beispiel #13
0
	def get_gates(self, train=False):
		X = self.get_input(train)
		padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
		X = X.dimshuffle((1, 0, 2))

		xg = T.dot(X, self.W_g) + self.b_g
		xc = T.dot(X, self.W_c) + self.b_c
		xo = T.dot(X, self.W_o) + self.b_o

		[outputs, memories, gates], updates = theano.scan(
			self._debug_step,
			sequences = [xg, xo, xc, padded_mask],
			outputs_info=[
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim, 2), 1)
			],
			non_sequences=[self.U_g, self.U_o, self.U_c],
			truncate_gradient=self.truncate_gradient)

		return outputs, gates, memories
Beispiel #14
0
    def debug_output(self, train = False, get_tuple = False):

        input_dict = self.get_input(train)
        X_encoder = input_dict['encoder_context']
        X_encoder = X_encoder.reshape((X_encoder.shape[0],X_encoder.shape[1],-1))
        X = input_dict['recurrent_context']
        X = X.dimshuffle((1, 0, 2))

        attention_encoder = T.dot(X_encoder,self.W_e2a)
        [outputs, contexts, attentionTotal], updates = theano.scan(
            self._step,
            sequences=[X],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], X.shape[0]), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.enc_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], X.shape[0], self.att_dim), 1)
            ],
            non_sequences=[X_encoder,attention_encoder],
            truncate_gradient=self.truncate_gradient,
            go_backwards=self.go_backwards)

        return outputs.dimshuffle((1, 0, 2)), X.dimshuffle((1,0,2)), contexts.dimshuffle((1,0,2)), attentionTotal, attention_encoder
Beispiel #15
0
    def memnn_cost(self, statements, question, ans, pe_matrix):
        # statements: list of list of word indices
        # question: list of word indices

        computed_memories, updates = theano.scan(
            self._compute_memories,
            sequences = [statements],
            outputs_info = [
                alloc_zeros_matrix(self.weights.shape[0], self.n_embedding)
            ],
            non_sequences = [
                self.weights.dimshuffle(1, 0, 2),
                pe_matrix
            ],
            truncate_gradient = -1,
        )

        memories = T.stacklists(computed_memories).dimshuffle(1, 0, 2)

        # Embed question
        u1 = T.sum(self.weights[0][question], axis=0)

        # Layer 1
        p = T.nnet.softmax(T.dot(u1, memories[0].T))
        o1 = T.dot(p, memories[1])

        # Layer 2
        u2 = o1 + T.dot(u1, self.H)
        p = T.nnet.softmax(T.dot(u2, memories[1].T))
        o2 = T.dot(p, memories[2])

        # Layer 3
        u3 = o2 + T.dot(u2, self.H)
        p = T.nnet.softmax(T.dot(u3, memories[2].T))
        o3 = T.dot(p, memories[3])

        # Score answers
        u4 = o3 + T.dot(u3, self.H)

        # Embed answer
        a1 = T.sum(self.A[ans[0]], axis=0)
        a2 = T.sum(self.A[ans[1]], axis=0)
        a3 = T.sum(self.A[ans[2]], axis=0)
        a4 = T.sum(self.A[ans[3]], axis=0)
        a = T.stack(a1, a2, a3, a4)
        scores = T.dot(T.dot(u4, self.U.T), T.dot(self.U, a.T))
        #scores = T.dot(T.dot(u4, self.U.T), T.dot(self.U, a.T))
        output = T.nnet.softmax(scores)

        return output[0]
	def get_output(self, train=False):
		X = self.get_input(train)
		padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
		X = X.dimshuffle((1, 0, 2))

		xsum = T.dot(X, self.W_sum) + self.b_sum
		xi = T.dot(X, self.W_i) + self.b_i
		xf = T.dot(X, self.W_f) + self.b_f
		xc = T.dot(X, self.W_c) + self.b_c
		xo = T.dot(X, self.W_o) + self.b_o

		[outputs, memories], updates = theano.scan(
			self._step,
			sequences=[xsum, xi, xf, xo, xc, padded_mask],
			outputs_info=[
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
			],
			non_sequences=[self.U_sum,self.U_i, self.U_f, self.U_o, self.U_c],
			truncate_gradient=self.truncate_gradient)

		if self.return_sequences:
			return outputs.dimshuffle((1, 0, 2))
		return outputs[-1]
Beispiel #17
0
    def get_output(self, train):
        X = self.get_input(train)
        X = X.dimshuffle((1, 0, 2))
        x_t = TT.dot(X, self.W) + self.b
        x_gate = TT.dot(X, self.W_gate) + self.b_gate

        outputs, updates = theano.scan(
            self._step,
            sequences=[E.tools.TT.arange(x_t.shape[0]), x_t, x_gate],
            outputs_info=[alloc_zeros_matrix(X.shape[1],  self.output_dim)],
            truncate_gradient=self.truncate_gradient
        )
        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]
Beispiel #18
0
    def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        x_e = T.dot(X, self.W_x2e) + self.b_x2e
        x_g = T.dot(X, self.W_x2g) + self.b_x2g

        [outputs, expert_memory], updates = theano.scan(
            self._step,
            sequences=[x_e, x_g, padded_mask],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.n_experts, self.output_dim), 1),
            ],
            truncate_gradient=self.truncate_gradient,
            go_backwards=self.go_backwards,
        )

        if self.return_sequences and self.go_backwards:
            return outputs[::-1].dimshuffle((1, 0, 2))
        elif self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]
Beispiel #19
0
	def get_output(self, train=False):
		X = self.get_input(train)
		padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
		X = X.dimshuffle((1, 0, 2))

		xc = T.dot(X, self.W_c) + self.b_c
		xo = T.dot(X, self.W_o) + self.b_o

		[outputs, memories], updates = theano.scan(
			self._step,
			sequences=[xo, xc, padded_mask],
			outputs_info=[
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
				T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
			],
			non_sequences=[self.U_o, self.U_c, self.W_maxout, self.b_maxout],
			truncate_gradient=self.truncate_gradient,
			go_backwards=self.go_backwards)

		if self.return_sequences and self.go_backwards:
			return outputs[::-1].dimshuffle((1, 0, 2))
		elif self.return_sequences:
			return outputs.dimshuffle((1, 0, 2))
		return outputs[-1]
Beispiel #20
0
    def get_output(self, train):
        X = self.get_input(train)
        X = X.dimshuffle((1,0,2))
        x_t = TT.dot(X, self.W) + self.b
        x_gate = TT.dot(X, self.W_gate) + self.b_gate

        outputs, updates = theano.scan(
            self._step,
            sequences=[x_t, x_gate],
            outputs_info=[dict(initial=alloc_zeros_matrix(3,  X.shape[1],  self.output_dim), taps=[-1, -2, -3])],
            non_sequences=[self.U, self.U_gate],
            truncate_gradient=self.truncate_gradient
        )
        if self.return_sequences:
            return outputs.dimshuffle((1,0,2))
        return outputs[-1]
Beispiel #21
0
    def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1))  # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask)  # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1)  # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2)  # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        # return mask.astype('int8')
        return mask.astype(theano.config.floatX)
    def memnn_cost(self, statements, question, pe_matrix):

        computed_memories, updates = theano.scan(
            self._compute_memories,
            sequences = statements,
            outputs_info = [ 
                alloc_zeros_matrix(self.weights.shape[0], 4800)   #init as 3
            ],
            non_sequences = [
                #self.weights.dimshuffle(1, 0, 2),
                self.weights,
                pe_matrix
            ],
            truncate_gradient = -1,
        )

        memories = T.stacklists(computed_memories).dimshuffle(1, 0, 2)

        # Embed question
        #s = theano.tensor.scalar('s')
        u1 = question
        #u1 = weights[0] * question

        #sv = skipthoughts.encode(model, sentence)

        # Layer 1
        p = T.nnet.softmax(T.dot(u1, memories[0].T))
        o1 = T.dot(p, memories[1])

        # Layer 2
        u2 = o1 + T.dot(u1, self.H)
        p = T.nnet.softmax(T.dot(u2, memories[1].T))
        o2 = T.dot(p, memories[2])

        # Layer 3
        u3 = o2 + T.dot(u2, self.H)
        p = T.nnet.softmax(T.dot(u3, memories[2].T))
        o3 = T.dot(p, memories[3])

        # Final
        output = T.nnet.softmax(T.dot(o3 + u3, self.weights[3].T))

        print "memnn_cost running"

        #return output[0, 1, 2, 3]
        return output[0]
Beispiel #23
0
    def get_output(self, train):
        X = self.get_input(train)
        if X.ndim == 3:
            X = X.dimshuffle((1,0,2))

        x_z = TT.dot(X, self.W_z) + self.b_z
        x_r = TT.dot(X, self.W_r) + self.b_r
        x_h = TT.dot(X, self.W_h) + self.b_h
        outputs, updates = theano.scan(
            self._step,
            sequences=[x_z, x_r, x_h],
            outputs_info=alloc_zeros_matrix(X.shape[1], self.output_dim),
            non_sequences=[self.U_z, self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient
        )
        if self.return_sequences:
            return outputs.dimshuffle((1,0,2))
        return outputs[-1]
Beispiel #24
0
    def get_output(self, train=False):
        X = self.get_input(train)
        mask = self.get_padded_shuffled_mask(train, X, pad=0)
        X = X.dimshuffle((1, 0, 2))
        Y = T.dot(X, self.W) + self.b
        # h0 = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
        h0 = T.repeat(self.h_m1, X.shape[1], axis=0)

        [outputs, _], updates = theano.scan(
            self._step,
            sequences=[Y, mask],
            outputs_info=[h0, T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)],
            non_sequences=[self.R],
            truncate_gradient=self.truncate_gradient, strict=True,
            allow_gc=theano.config.scan.allow_gc)

        if self.return_sequences:
            return (T.concatenate(h0.dimshuffle('x', 0, 1), outputs, axis=0).dimshuffle((1, 0, 2)),
                    mask[1:].dimshuffle(1, 0, 2))
        return outputs[-1]
Beispiel #25
0
    def memnn_cost(self, statements, question, pe_matrix):
        # statements: list of list of word indices
        # question: list of word indices

        computed_memories, updates = theano.scan(
            self._compute_memories,
            sequences = [statements],
            outputs_info = [
                alloc_zeros_matrix(self.weights.shape[0], self.n_embedding)
            ],
            non_sequences = [
                self.weights.dimshuffle(1, 0, 2),
                pe_matrix
            ],
            truncate_gradient = -1,
        )

        memories = T.stacklists(computed_memories).dimshuffle(1, 0, 2)

        # Embed question
        u1 = T.sum(self.weights[0][question], axis=0)

        # Layer 1
        p = T.nnet.softmax(T.dot(u1, memories[0].T))
        o1 = T.dot(p, memories[1])

        # Layer 2
        u2 = o1 + T.dot(u1, self.H)
        p = T.nnet.softmax(T.dot(u2, memories[1].T))
        o2 = T.dot(p, memories[2])

        # Layer 3
        u3 = o2 + T.dot(u2, self.H)
        p = T.nnet.softmax(T.dot(u3, memories[2].T))
        o3 = T.dot(p, memories[3])

        # Final
        output = T.nnet.softmax(T.dot(o3 + u3, self.weights[3].T))

        return output[0]
Beispiel #26
0
	def get_output(self, train=False):
		X = self.get_input(train)
		padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
		X = X.dimshuffle((1, 0, 2))

		
		# x_f = T.dot(X, self.W_xf) + self.b_f + T.dot(X_tm1[:-1], self.U_hf)
		# x_z = T.dot(X, self.W_xz) + self.b_z + T.dot(X_tm1[:-1], self.U_xz)
		# x_o = T.dot(X, self.W_xo) + self.b_o + T.dot(X_tm1[:-1], self.U_xo)

		x_f = self.inner_activation(T.dot(X, self.W_xf) + self.b_f)
		x_z = self.activation(T.dot(X, self.W_xz) + self.b_z)
		x_o = T.dot(X, self.W_xo) + self.b_o


		if self.p > 0:
			retain_prop = 1. - self.p
			if train:
				# x_f *= self.srng.binomial(x_f.shape, p=retain_prop, dtype=theano.config.floatX)
				x_z *= self.srng.binomial(x_z.shape, p=retain_prop, dtype=theano.config.floatX)
				x_o *= self.srng.binomial(x_o.shape, p=retain_prop, dtype=theano.config.floatX)
			else:
				x_z *= retain_prop
				x_o *= retain_prop
				# x_f *= retain_prop

		
		h_info = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)

		outputs, updates = theano.scan(
		    self._step,
		    sequences=[x_f, x_z, x_o, padded_mask],
		    outputs_info=[h_info],
		    # non_sequences=[self.U_hf],
		    truncate_gradient=self.truncate_gradient,
		    go_backwards=self.go_backwards)

		if self.return_sequences:
		    return outputs.dimshuffle((1, 0, 2))
		return outputs[-1]
Beispiel #27
0
    def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=0)
        X = X.dimshuffle((1, 0, 2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        # h0 = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
        h0 = T.repeat(self.h00, X.shape[1], axis=0)

        [outputs, _], updates = theano.scan(
            self._step,
            sequences=[xi, xf, xo, xc, padded_mask],
            outputs_info=[h0, T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)],
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return (T.concatenate(h0.dimshuffle('x', 0, 1), outputs, axis=0).dimshuffle((1, 0, 2)),
                    padded_mask[1:].dimshuffle(1, 0, 2))
        return outputs[-1]
Beispiel #28
0
 def get_initial_states(self, inputs):
     # u_init = alloc_zeros_matrix(inputs.shape[0], self.causes_dim) + .1
     u_init = theano_rng.uniform(low=0,
                                 high=1,
                                 size=(inputs.shape[0], self.causes_dim))
     return (alloc_zeros_matrix(inputs.shape[0], self.output_dim), u_init)
Beispiel #29
0
 def get_initial_states(self, inputs):
     u_init = alloc_zeros_matrix(inputs.shape[0], self.causes_dim) + .1
     return (alloc_zeros_matrix(inputs.shape[0], self.output_dim), u_init)
Beispiel #30
0
 def get_initial_states(self, X):
     return alloc_zeros_matrix(X.shape[0], self.stack_size, self.code_row,
                               self.code_col)
Beispiel #31
0
 def get_initial_states(self, X):
     return alloc_zeros_matrix(X.shape[0], self.output_dim)
Beispiel #32
0
 def get_initial_states(self, X):
     return alloc_zeros_matrix(X.shape[0], self.output_dim)
Beispiel #33
0
 def get_initial_states(self, inputs):
     # u_init = alloc_zeros_matrix(inputs.shape[0], self.causes_dim) + .1
     u_init = theano_rng.uniform(low=0, high=1, size=(inputs.shape[0],
                                                      self.causes_dim))
     return (alloc_zeros_matrix(inputs.shape[0], self.output_dim), u_init)
Beispiel #34
0
 def get_initial_states(self, X):
     return alloc_zeros_matrix(X.shape[0], self.stack_size,
                               self.code_row, self.code_col)