def __init__(self, input_dimensionality, output_dimensionality, data_x, data_y, max_len, params=None, batch_size=100, learning_rate=0.01, momentum=.25):
        self.input_dimensionality = input_dimensionality
        self.output_dimensionality = output_dimensionality
        self.max_len = max_len

        input_stack = T.fmatrix('input_seq')
        label_stack = T.imatrix('label')
        index = T.iscalar()  # index to the sample

        if params is None:
            ff1 = FeedForwardLayer(input_stack, self.input_dimensionality, 2000)
            ff2 = FeedForwardLayer(ff1.output, 2000, 1000)
            ff3 = FeedForwardLayer(ff2.output, 1000, 500)
            rf = RecurrentLayer(ff3.output, 500, 250, False)     # Forward layer
            rb = RecurrentLayer(ff3.output, 500, 250, True)      # Backward layer
            s = SoftmaxLayer(T.concatenate((rf.output, rb.output), axis=1), 2*250, self.output_dimensionality)

        else:
            ff1 = FeedForwardLayer(input_stack, self.input_dimensionality, 2000, params[0])
            ff2 = FeedForwardLayer(ff1.output, 2000, 1000, params[1])
            ff3 = FeedForwardLayer(ff2.output, 1000, 500, params[2])
            rf = RecurrentLayer(ff3.output, 500, 250, False, params[3])     # Forward layer
            rb = RecurrentLayer(ff3.output, 500, 250, True, params[4])      # Backward layer
            s = SoftmaxLayer(T.concatenate((rf.output, rb.output), axis=1), 2*250, self.output_dimensionality, params[5])


        ctc = CTCLayer(s.output, label_stack, self.output_dimensionality-1, batch_size)
        
        updates = []
        for layer in (s, rb, rf, ff3, ff2, ff1):
            for p in layer.params:
                #param_update = theano.shared(p.get_value()*0., broadcastable=p.broadcastable)
                #grad = T.grad(ctc.cost, p)
                #updates.append((p, p - learning_rate * param_update))
                #updates.append((param_update, momentum * param_update + (1. - momentum) * grad))
                updates.append((p, p - learning_rate*T.grad(ctc.cost, p)))

        self.trainer = theano.function(
            inputs=[index],
            outputs=[ctc.cost],
            updates=updates,
            givens={
                input_stack: data_x[index*batch_size:(index+1)*batch_size].reshape((self.max_len*batch_size, 240)),
                label_stack: data_y[index*batch_size:(index+1)*batch_size]
            }
        )
        self.tester = theano.function(
            inputs=[index],
            outputs=[s.output],
            givens={
                input_stack: data_x[index*batch_size:(index+1)*batch_size].reshape((self.max_len*batch_size, 240)),
            }
        )
Example #2
0
    def __init__(self, input_dimensionality, output_dimensionality, params=None, learning_rate=.01, momentum_rate=.25, data_x=None, data_y=None):
        inputs = T.matrix('input_seq')
        labels = T.ivector('labels')
        #index = T.iscalar('index')
        
        if params is None:
            self.ff1 = FeedForwardLayer(inputs, input_dimensionality, 3000)
            self.ff2 = FeedForwardLayer(self.ff1.output, 3000, 2000)
            self.ff3 = FeedForwardLayer(self.ff2.output, 2000, 500)
            self.rf = RecurrentLayer(self.ff3.output, 500, 300, False)     # Forward layer
            self.rb = RecurrentLayer(self.ff3.output, 500, 300, True)      # Backward layer
            self.s = SoftmaxLayer(T.concatenate((self.rf.output, self.rb.output), axis=1), 2*300, output_dimensionality)
        else:
            self.ff1 = FeedForwardLayer(inputs, input_dimensionality, 3000, params[0])
            self.ff2 = FeedForwardLayer(self.ff1.output, 3000, 2000, params[1])
            self.ff3 = FeedForwardLayer(self.ff2.output, 2000, 500, params[2])
            self.rf = RecurrentLayer(self.ff3.output, 500, 300, False, params[3])     # Forward layer
            self.rb = RecurrentLayer(self.ff3.output, 500, 300, True, params[4])      # Backward layer
            self.s = SoftmaxLayer(T.concatenate((self.rf.output, self.rb.output), axis=1), 2*300, output_dimensionality, params[5])
            
        ctc = CTCLayer(self.s.output, labels, output_dimensionality-1)
        l2 = T.sum(self.ff1.W**2) + T.sum(self.ff2.W**2) + T.sum(self.ff3.W**2) + T.sum(self.s.W**2) + T.sum(self.rf.W_if**2) + T.sum(self.rf.W_ff**2) + T.sum(self.rb.W_if**2) + T.sum(self.rb.W_ff**2)

        updates = []
        for layer in (self.ff1, self.ff2, self.ff3, self.rf, self.rb, self.s):
            for p in layer.params:
                param_update = theano.shared(p.get_value()*0., broadcastable=p.broadcastable)
                grad = T.grad(ctc.cost - .005*l2, p)
                updates.append((p, p-learning_rate*param_update))
                updates.append((param_update, momentum_rate*param_update + (1. - momentum_rate)*grad))

        self.trainer = theano.function(
            inputs=[inputs, labels],
            #inputs=[index],
            outputs=[ctc.cost, self.s.output],
            updates=updates,
            #givens=
            #    {
            #        inputs: data_x[index],
            #        labels: data_y[index]
            #    }
        )

        self.validator = theano.function(
            inputs=[inputs, labels],
            outputs=[ctc.cost]
        )

        self.tester = theano.function(
            inputs=[inputs],
            outputs=[self.s.output]
        )
    def __init__(self,
                 input_dimensionality,
                 output_dimensionality,
                 params=None,
                 learning_rate=.01,
                 momentum_rate=.25,
                 data_x=None,
                 data_y=None):
        inputs = T.matrix('input_seq')
        labels = T.ivector('labels')
        #index = T.iscalar('index')

        if params is None:
            self.ff1 = FeedForwardLayer(inputs, input_dimensionality, 3000)
            self.ff2 = FeedForwardLayer(self.ff1.output, 3000, 2000)
            self.ff3 = FeedForwardLayer(self.ff2.output, 2000, 500)
            self.rf = RecurrentLayer(self.ff3.output, 500, 300,
                                     False)  # Forward layer
            self.rb = RecurrentLayer(self.ff3.output, 500, 300,
                                     True)  # Backward layer
            self.s = SoftmaxLayer(
                T.concatenate((self.rf.output, self.rb.output), axis=1),
                2 * 300, output_dimensionality)
        else:
            self.ff1 = FeedForwardLayer(inputs, input_dimensionality, 3000,
                                        params[0])
            self.ff2 = FeedForwardLayer(self.ff1.output, 3000, 2000, params[1])
            self.ff3 = FeedForwardLayer(self.ff2.output, 2000, 500, params[2])
            self.rf = RecurrentLayer(self.ff3.output, 500, 300, False,
                                     params[3])  # Forward layer
            self.rb = RecurrentLayer(self.ff3.output, 500, 300, True,
                                     params[4])  # Backward layer
            self.s = SoftmaxLayer(
                T.concatenate((self.rf.output, self.rb.output), axis=1),
                2 * 300, output_dimensionality, params[5])

        ctc = CTCLayer(self.s.output, labels, output_dimensionality - 1)
        l2 = T.sum(self.ff1.W**2) + T.sum(self.ff2.W**2) + T.sum(
            self.ff3.W**2) + T.sum(self.s.W**2) + T.sum(
                self.rf.W_if**2) + T.sum(self.rf.W_ff**2) + T.sum(
                    self.rb.W_if**2) + T.sum(self.rb.W_ff**2)

        updates = []
        for layer in (self.ff1, self.ff2, self.ff3, self.rf, self.rb, self.s):
            for p in layer.params:
                param_update = theano.shared(p.get_value() * 0.,
                                             broadcastable=p.broadcastable)
                grad = T.grad(ctc.cost - .005 * l2, p)
                updates.append((p, p - learning_rate * param_update))
                updates.append((param_update, momentum_rate * param_update +
                                (1. - momentum_rate) * grad))

        self.trainer = theano.function(
            inputs=[inputs, labels],
            #inputs=[index],
            outputs=[ctc.cost, self.s.output],
            updates=updates,
            #givens=
            #    {
            #        inputs: data_x[index],
            #        labels: data_y[index]
            #    }
        )

        self.validator = theano.function(inputs=[inputs, labels],
                                         outputs=[ctc.cost])

        self.tester = theano.function(inputs=[inputs], outputs=[self.s.output])