def train(self, xy_data, rnns=None):
        # This function trains one RNN for each possible output digit

        self.rnns = rnns if rnns is not None else self.rnns

        # xs = [np.array(self.encode_expr(self.scramble_double(x))) for x,y in xy_data]
        ys = [self.encode_expr(lengthen(y, self.y_len)) for x, y in xy_data]

        # for printing purposes only
        dev_data = get_data('data/dev.txt')

        for i, rnn_i in enumerate(self.rnns):
            # where i is the index of the rnn we're using
            print 'i', i

            xs_i = [
                np.array(self.encode_expr(self.scramble(x, i)))
                for x, y in xy_data
            ]
            ys_i = [y[i] for y in ys]
            dev_xs_i = [
                np.array(self.encode_expr(self.scramble(x, i)))
                for x, y in dev_data
            ]
            dev_ys_i = [
                self.encode_expr(lengthen(y, self.y_len))[i]
                for x, y in dev_data
            ]

            rnn_i.grad_check(dev_xs_i[0], dev_ys_i[0])

            for j in xrange(self.n_epochs):
                for x, y in zip(xs_i, ys):
                    rnn_i.train_point_sgd(x, y[i], self.alpha)
                # print 'train loss', rnn_i.compute_loss(xs_i, ys_i)
                if j % 10 == 0:
                    print 'dev loss', rnn_i.compute_loss(dev_xs_i, dev_ys_i)

            # # extra stuff to print
            # for x,y in zip(xs_i,ys)[:5]:
            #     yhat = rnn_i.predict(x)
            #     print x, yhat, np.argmax(yhat)

        return self.rnns
Example #2
0
    def train(self, xy_data, rnns = None):
        # This function trains one RNN for each possible output digit

        self.rnns = rnns if rnns is not None else self.rnns

        # xs = [np.array(self.encode_expr(self.scramble_double(x))) for x,y in xy_data]
        ys = [self.encode_expr(lengthen(y, self.y_len)) for x,y in xy_data]

        # for printing purposes only
        dev_data = get_data('data/dev.txt')

        for i,rnn_i in enumerate(self.rnns):
            # where i is the index of the rnn we're using
            print 'i',i

            xs_i = [np.array(self.encode_expr(self.scramble(x, i))) for x,y in xy_data]
            ys_i = [y[i] for y in ys]
            dev_xs_i = [np.array(self.encode_expr(self.scramble(x, i))) for x,y in dev_data]
            dev_ys_i = [self.encode_expr(lengthen(y, self.y_len))[i] for x,y in dev_data]

            rnn_i.grad_check(dev_xs_i[0], dev_ys_i[0])

            for j in xrange(self.n_epochs):
                for x,y in zip(xs_i, ys):
                    rnn_i.train_point_sgd(x, y[i], self.alpha)
                # print 'train loss', rnn_i.compute_loss(xs_i, ys_i)
                if j % 10 == 0:
                    print 'dev loss', rnn_i.compute_loss(dev_xs_i, dev_ys_i)

            
            # # extra stuff to print
            # for x,y in zip(xs_i,ys)[:5]:
            #     yhat = rnn_i.predict(x)
            #     print x, yhat, np.argmax(yhat)

        return self.rnns
Example #3
0
    def learn(self, xy_data):
        # This function is supposed to learn some stats from the training
        # data and store it in our cache

        # Structure of Cache:
        # (index of x0 and x1, digit x0 of first arg, digit x1 of second arg, index of answer)
        # the counter then stores how many times we see each character
        #   appear for this digit of the answer
        self.cache = defaultdict(Counter)

        for x,y in xy_data:
            # we split our args by the operator
            nums = self.num_split(x)
            y = lengthen(y, self.y_len)
            for i,(x0,x1) in enumerate(zip(nums[0], nums[1])): # for digit of x0,x1
                for j,y0 in enumerate(y): # for digit of y
                    self.cache[(i,x0,x1,j)][y0] += 1

        return self.cache
Example #4
0
    def learn(self, xy_data):
        # This function is supposed to learn some stats from the training
        # data and store it in our cache

        # Structure of Cache:
        # (index of x0 and x1, digit x0 of first arg, digit x1 of second arg, index of answer)
        # the counter then stores how many times we see each character
        #   appear for this digit of the answer
        self.cache = defaultdict(Counter)

        for x, y in xy_data:
            # we split our args by the operator
            nums = self.num_split(x)
            y = lengthen(y, self.y_len)
            for i, (x0, x1) in enumerate(zip(nums[0],
                                             nums[1])):  # for digit of x0,x1
                for j, y0 in enumerate(y):  # for digit of y
                    self.cache[(i, x0, x1, j)][y0] += 1

        return self.cache
Example #5
0
 def lengthen_double(self, x_string):
     # format from '21 + 12' -> '021 + 012'
     return ' + '.join([lengthen(s, self.x_len) for s in x_string.split(' + ')])
 def lengthen_double(self, x_string):
     # format from '21 + 12' -> '021 + 012'
     return ' + '.join(
         [lengthen(s, self.x_len) for s in x_string.split(' + ')])
Example #7
0
 def num_split(self, x_string):
     # split args by operator, e.g. '21 + 12' => ['021','012']
     # assuming only addition for now
     return [lengthen(s, self.x_len) for s in x_string.split(' + ')]
Example #8
0
 def num_split(self, x_string):
     # split args by operator, e.g. '21 + 12' => ['021','012']
     # assuming only addition for now
     return [lengthen(s, self.x_len) for s in x_string.split(' + ')]