def test_ctc_differentiable(self): costs = ctc.cost( linear_out=self.lin_output, frame_lengths=self.data_length, labels=self.labels, label_lengths=self.labels_length ) g = T.grad(T.mean(costs), wrt=self.transform) self.assertTrue((~np.isnan(g.eval())).all())
def label_seq(string): idxs = font.indexify(string) return idxs if __name__ == "__main__": P = Parameters() X = T.matrix('X') Y = T.ivector('Y') predict = build_model(P, 8, 512, len(font.chars) + 1) probs = predict(X) alpha = 0.5 params = P.values() cost = ctc.cost(probs, Y) #+ 1e-8 * sum(T.sum(T.sqr(w)) for w in params) gradients = T.grad(cost, wrt=params) gradient_acc = [theano.shared(0 * p.get_value()) for p in params] counter = theano.shared(np.float32(0.)) acc = theano.function(inputs=[X, Y], outputs=cost, updates=[(a, a + g) for a, g in zip(gradient_acc, gradients)] + [(counter, counter + np.float32(1.))]) update = theano.function( inputs=[],outputs=[], updates = updates.momentum(params,[ g / counter for g in gradient_acc ]) \ + [ (a, np.float32(0) * a) for a in gradient_acc ] \ + [ (counter,np.float32(0.)) ] )
predict = T.nnet.softmax(T.dot(hidden, W_hidden_output) + b_output) return X, predict def label_seq(string): idxs = font.indexify(string) result = np.ones((len(idxs) * 2 + 1, ), dtype=np.int32) * -1 result[np.arange(len(idxs)) * 2 + 1] = idxs print result return result if __name__ == "__main__": P = Parameters() X = T.matrix('X') Y = T.ivector('Y') X, predict = build_model(P, X, 10, 10, 10) cost = ctc.cost(predict, Y) params = P.values() grad = T.grad(cost, wrt=params) train = theano.function(inputs=[X, Y], outputs=cost, updates=updates.adadelta(params, grad)) for _ in xrange(10): print train( np.eye(10, dtype=np.float32)[::-1], np.arange(10, dtype=np.int32))
predict = T.nnet.softmax(T.dot(hidden, W_hidden_output) + b_output) return X, predict def label_seq(string): idxs = font.indexify(string) result = np.ones((len(idxs) * 2 + 1,), dtype=np.int32) * -1 result[np.arange(len(idxs)) * 2 + 1] = idxs print result return result if __name__ == "__main__": P = Parameters() X = T.matrix('X') Y = T.ivector('Y') X, predict = build_model(P, X, 10, 10, 10) cost = ctc.cost(predict, Y) params = P.values() grad = T.grad(cost, wrt=params) train = theano.function( inputs=[X, Y], outputs=cost, updates=updates.adadelta(params, grad) ) for _ in xrange(10): print train(np.eye(10, dtype=np.float32)[::-1], np.arange(10, dtype=np.int32))
idxs = font.indexify(string) return idxs if __name__ == "__main__": P = Parameters() X = T.matrix('X') Y = T.ivector('Y') predict = build_model(P,8,512,len(font.chars)+1) probs = predict(X) alpha = 0.5 params = P.values() cost = ctc.cost(probs, Y) #+ 1e-8 * sum(T.sum(T.sqr(w)) for w in params) gradients = T.grad(cost, wrt=params) gradient_acc = [ theano.shared(0 * p.get_value()) for p in params ] counter = theano.shared(np.float32(0.)) acc = theano.function( inputs=[X, Y], outputs=cost, updates = [ (a,a + g) for a,g in zip(gradient_acc,gradients) ] + [(counter,counter + np.float32(1.))] ) update = theano.function( inputs=[],outputs=[], updates = updates.momentum(params,[ g / counter for g in gradient_acc ]) \ + [ (a, np.float32(0) * a) for a in gradient_acc ] \