def __init__(self, input_dimensionality, output_dimensionality, data_x, data_y, max_len, params=None, batch_size=100, learning_rate=0.01, momentum=.25): self.input_dimensionality = input_dimensionality self.output_dimensionality = output_dimensionality self.max_len = max_len input_stack = T.fmatrix('input_seq') label_stack = T.imatrix('label') index = T.iscalar() # index to the sample if params is None: ff1 = FeedForwardLayer(input_stack, self.input_dimensionality, 2000) ff2 = FeedForwardLayer(ff1.output, 2000, 1000) ff3 = FeedForwardLayer(ff2.output, 1000, 500) rf = RecurrentLayer(ff3.output, 500, 250, False) # Forward layer rb = RecurrentLayer(ff3.output, 500, 250, True) # Backward layer s = SoftmaxLayer(T.concatenate((rf.output, rb.output), axis=1), 2*250, self.output_dimensionality) else: ff1 = FeedForwardLayer(input_stack, self.input_dimensionality, 2000, params[0]) ff2 = FeedForwardLayer(ff1.output, 2000, 1000, params[1]) ff3 = FeedForwardLayer(ff2.output, 1000, 500, params[2]) rf = RecurrentLayer(ff3.output, 500, 250, False, params[3]) # Forward layer rb = RecurrentLayer(ff3.output, 500, 250, True, params[4]) # Backward layer s = SoftmaxLayer(T.concatenate((rf.output, rb.output), axis=1), 2*250, self.output_dimensionality, params[5]) ctc = CTCLayer(s.output, label_stack, self.output_dimensionality-1, batch_size) updates = [] for layer in (s, rb, rf, ff3, ff2, ff1): for p in layer.params: #param_update = theano.shared(p.get_value()*0., broadcastable=p.broadcastable) #grad = T.grad(ctc.cost, p) #updates.append((p, p - learning_rate * param_update)) #updates.append((param_update, momentum * param_update + (1. - momentum) * grad)) updates.append((p, p - learning_rate*T.grad(ctc.cost, p))) self.trainer = theano.function( inputs=[index], outputs=[ctc.cost], updates=updates, givens={ input_stack: data_x[index*batch_size:(index+1)*batch_size].reshape((self.max_len*batch_size, 240)), label_stack: data_y[index*batch_size:(index+1)*batch_size] } ) self.tester = theano.function( inputs=[index], outputs=[s.output], givens={ input_stack: data_x[index*batch_size:(index+1)*batch_size].reshape((self.max_len*batch_size, 240)), } )
def __init__(self, input_dimensionality, output_dimensionality, params=None, learning_rate=.01, momentum_rate=.25, data_x=None, data_y=None): inputs = T.matrix('input_seq') labels = T.ivector('labels') #index = T.iscalar('index') if params is None: self.ff1 = FeedForwardLayer(inputs, input_dimensionality, 3000) self.ff2 = FeedForwardLayer(self.ff1.output, 3000, 2000) self.ff3 = FeedForwardLayer(self.ff2.output, 2000, 500) self.rf = RecurrentLayer(self.ff3.output, 500, 300, False) # Forward layer self.rb = RecurrentLayer(self.ff3.output, 500, 300, True) # Backward layer self.s = SoftmaxLayer(T.concatenate((self.rf.output, self.rb.output), axis=1), 2*300, output_dimensionality) else: self.ff1 = FeedForwardLayer(inputs, input_dimensionality, 3000, params[0]) self.ff2 = FeedForwardLayer(self.ff1.output, 3000, 2000, params[1]) self.ff3 = FeedForwardLayer(self.ff2.output, 2000, 500, params[2]) self.rf = RecurrentLayer(self.ff3.output, 500, 300, False, params[3]) # Forward layer self.rb = RecurrentLayer(self.ff3.output, 500, 300, True, params[4]) # Backward layer self.s = SoftmaxLayer(T.concatenate((self.rf.output, self.rb.output), axis=1), 2*300, output_dimensionality, params[5]) ctc = CTCLayer(self.s.output, labels, output_dimensionality-1) l2 = T.sum(self.ff1.W**2) + T.sum(self.ff2.W**2) + T.sum(self.ff3.W**2) + T.sum(self.s.W**2) + T.sum(self.rf.W_if**2) + T.sum(self.rf.W_ff**2) + T.sum(self.rb.W_if**2) + T.sum(self.rb.W_ff**2) updates = [] for layer in (self.ff1, self.ff2, self.ff3, self.rf, self.rb, self.s): for p in layer.params: param_update = theano.shared(p.get_value()*0., broadcastable=p.broadcastable) grad = T.grad(ctc.cost - .005*l2, p) updates.append((p, p-learning_rate*param_update)) updates.append((param_update, momentum_rate*param_update + (1. - momentum_rate)*grad)) self.trainer = theano.function( inputs=[inputs, labels], #inputs=[index], outputs=[ctc.cost, self.s.output], updates=updates, #givens= # { # inputs: data_x[index], # labels: data_y[index] # } ) self.validator = theano.function( inputs=[inputs, labels], outputs=[ctc.cost] ) self.tester = theano.function( inputs=[inputs], outputs=[self.s.output] )
def __init__(self, input_dimensionality, output_dimensionality, params=None, learning_rate=.01, momentum_rate=.25, data_x=None, data_y=None): inputs = T.matrix('input_seq') labels = T.ivector('labels') #index = T.iscalar('index') if params is None: self.ff1 = FeedForwardLayer(inputs, input_dimensionality, 3000) self.ff2 = FeedForwardLayer(self.ff1.output, 3000, 2000) self.ff3 = FeedForwardLayer(self.ff2.output, 2000, 500) self.rf = RecurrentLayer(self.ff3.output, 500, 300, False) # Forward layer self.rb = RecurrentLayer(self.ff3.output, 500, 300, True) # Backward layer self.s = SoftmaxLayer( T.concatenate((self.rf.output, self.rb.output), axis=1), 2 * 300, output_dimensionality) else: self.ff1 = FeedForwardLayer(inputs, input_dimensionality, 3000, params[0]) self.ff2 = FeedForwardLayer(self.ff1.output, 3000, 2000, params[1]) self.ff3 = FeedForwardLayer(self.ff2.output, 2000, 500, params[2]) self.rf = RecurrentLayer(self.ff3.output, 500, 300, False, params[3]) # Forward layer self.rb = RecurrentLayer(self.ff3.output, 500, 300, True, params[4]) # Backward layer self.s = SoftmaxLayer( T.concatenate((self.rf.output, self.rb.output), axis=1), 2 * 300, output_dimensionality, params[5]) ctc = CTCLayer(self.s.output, labels, output_dimensionality - 1) l2 = T.sum(self.ff1.W**2) + T.sum(self.ff2.W**2) + T.sum( self.ff3.W**2) + T.sum(self.s.W**2) + T.sum( self.rf.W_if**2) + T.sum(self.rf.W_ff**2) + T.sum( self.rb.W_if**2) + T.sum(self.rb.W_ff**2) updates = [] for layer in (self.ff1, self.ff2, self.ff3, self.rf, self.rb, self.s): for p in layer.params: param_update = theano.shared(p.get_value() * 0., broadcastable=p.broadcastable) grad = T.grad(ctc.cost - .005 * l2, p) updates.append((p, p - learning_rate * param_update)) updates.append((param_update, momentum_rate * param_update + (1. - momentum_rate) * grad)) self.trainer = theano.function( inputs=[inputs, labels], #inputs=[index], outputs=[ctc.cost, self.s.output], updates=updates, #givens= # { # inputs: data_x[index], # labels: data_y[index] # } ) self.validator = theano.function(inputs=[inputs, labels], outputs=[ctc.cost]) self.tester = theano.function(inputs=[inputs], outputs=[self.s.output])