def softmax_with_zero(x): x1 = T.concatenate((x, T.zeros(1, x.shape[1])), axis=1) x2 = T.softmax(x1) # x2 = softmax(x1) return x2
def fit(self, learning_rate=1e-6, momentum=1e-8, batch=200, activation=T.tanh, depth=7): self.f = activation #define set of input and corresponding output for supervise learning X = [[]] Y = [[]] #theano input-output vectors thX = T.fvector('X') thY = T.fvector('Y') thK = T.iscalar('depth') #reccurent call of evaluation, return next pair of input\reccurent hidden values def recurrence(x_t, h_t1): #update reccurent hidden values #h_t = f(Wx*x + Wh*h_t1 + b) h_t = self.f(x_t.dot(self.Wx) + h_t1.dot(self.Wh) + self.bh) #calculate current output, note in our model it is the next time step disctribution #y_t = f(Wo*h_t + b) y_t = self.f(h_t.dot(self.Wo) + self.bo) return h_t, y_t #define theano scan function for call [h, y], _ = th.scan( fn=recurrence, outputs_info=[self.h0, None], sequences=thX, n_steps=thK, ) #define prediction, should be normalyze function #temporal approache -- softmax prediction = T.softmax(Y) #define learning model #for the cost is usuall log loss function cost = -T.mean(T.log(Y[T.arange(thY.shape[0]), thY])) #for grad use theano grad function grads = T.gtrad(cost, self.params) #calculate the change of params for momentum #init to all zero dparams = [theano.shared(p.get_value() * 0) for p in self.params] #define the update using gradient decent algorithm with momentum #i.e. w <- w + momentum * dw - n * grad_w(E) # dw<- momentum * dw - n * grad_w(E) updates = [(p, p + mu * dp - learning_rate * g) for p, dp, g in zip(self.params, dparams, grads) ] + [(dp, mu * dp - learning_rate * g) for dp, g in zip(dparams, grads)] #define complete training model for theano self.predict_op = th.function(inputs=[thX, thK], outputs=prediction) self.train_op = th.function(inputs=[thX, thY], outputs=[cost, prediction, y], updates=updates)
def __call__(self, x): return T.softmax(x * self.t)