Beispiel #1
0
def softmax_with_zero(x):

	x1 = T.concatenate((x, T.zeros(1, x.shape[1])), axis=1)
	x2 = T.softmax(x1)
	# x2 = softmax(x1)
	return x2
    def fit(self,
            learning_rate=1e-6,
            momentum=1e-8,
            batch=200,
            activation=T.tanh,
            depth=7):
        self.f = activation

        #define set of input and corresponding output for supervise learning
        X = [[]]
        Y = [[]]

        #theano input-output vectors
        thX = T.fvector('X')
        thY = T.fvector('Y')
        thK = T.iscalar('depth')

        #reccurent call of evaluation, return next pair of input\reccurent hidden values
        def recurrence(x_t, h_t1):
            #update reccurent hidden values
            #h_t = f(Wx*x + Wh*h_t1 + b)
            h_t = self.f(x_t.dot(self.Wx) + h_t1.dot(self.Wh) + self.bh)
            #calculate current output, note in our model it is the next time step disctribution
            #y_t = f(Wo*h_t + b)
            y_t = self.f(h_t.dot(self.Wo) + self.bo)
            return h_t, y_t

        #define theano scan function for call
        [h, y], _ = th.scan(
            fn=recurrence,
            outputs_info=[self.h0, None],
            sequences=thX,
            n_steps=thK,
        )

        #define prediction, should be normalyze function
        #temporal approache -- softmax
        prediction = T.softmax(Y)

        #define learning model
        #for the cost is usuall log loss function
        cost = -T.mean(T.log(Y[T.arange(thY.shape[0]), thY]))
        #for grad use theano grad function
        grads = T.gtrad(cost, self.params)
        #calculate the change of params for momentum
        #init to all zero
        dparams = [theano.shared(p.get_value() * 0) for p in self.params]

        #define the update using gradient decent algorithm with momentum
        #i.e. w <- w + momentum * dw - n * grad_w(E)
        #     dw<- momentum * dw - n * grad_w(E)
        updates = [(p, p + mu * dp - learning_rate * g)
                   for p, dp, g in zip(self.params, dparams, grads)
                   ] + [(dp, mu * dp - learning_rate * g)
                        for dp, g in zip(dparams, grads)]

        #define complete training model for theano
        self.predict_op = th.function(inputs=[thX, thK], outputs=prediction)
        self.train_op = th.function(inputs=[thX, thY],
                                    outputs=[cost, prediction, y],
                                    updates=updates)
Beispiel #3
0
 def __call__(self, x):
     return T.softmax(x * self.t)