def train(self, xy_data, rnns=None): # This function trains one RNN for each possible output digit self.rnns = rnns if rnns is not None else self.rnns # xs = [np.array(self.encode_expr(self.scramble_double(x))) for x,y in xy_data] ys = [self.encode_expr(lengthen(y, self.y_len)) for x, y in xy_data] # for printing purposes only dev_data = get_data('data/dev.txt') for i, rnn_i in enumerate(self.rnns): # where i is the index of the rnn we're using print 'i', i xs_i = [ np.array(self.encode_expr(self.scramble(x, i))) for x, y in xy_data ] ys_i = [y[i] for y in ys] dev_xs_i = [ np.array(self.encode_expr(self.scramble(x, i))) for x, y in dev_data ] dev_ys_i = [ self.encode_expr(lengthen(y, self.y_len))[i] for x, y in dev_data ] rnn_i.grad_check(dev_xs_i[0], dev_ys_i[0]) for j in xrange(self.n_epochs): for x, y in zip(xs_i, ys): rnn_i.train_point_sgd(x, y[i], self.alpha) # print 'train loss', rnn_i.compute_loss(xs_i, ys_i) if j % 10 == 0: print 'dev loss', rnn_i.compute_loss(dev_xs_i, dev_ys_i) # # extra stuff to print # for x,y in zip(xs_i,ys)[:5]: # yhat = rnn_i.predict(x) # print x, yhat, np.argmax(yhat) return self.rnns
def train(self, xy_data, rnns = None): # This function trains one RNN for each possible output digit self.rnns = rnns if rnns is not None else self.rnns # xs = [np.array(self.encode_expr(self.scramble_double(x))) for x,y in xy_data] ys = [self.encode_expr(lengthen(y, self.y_len)) for x,y in xy_data] # for printing purposes only dev_data = get_data('data/dev.txt') for i,rnn_i in enumerate(self.rnns): # where i is the index of the rnn we're using print 'i',i xs_i = [np.array(self.encode_expr(self.scramble(x, i))) for x,y in xy_data] ys_i = [y[i] for y in ys] dev_xs_i = [np.array(self.encode_expr(self.scramble(x, i))) for x,y in dev_data] dev_ys_i = [self.encode_expr(lengthen(y, self.y_len))[i] for x,y in dev_data] rnn_i.grad_check(dev_xs_i[0], dev_ys_i[0]) for j in xrange(self.n_epochs): for x,y in zip(xs_i, ys): rnn_i.train_point_sgd(x, y[i], self.alpha) # print 'train loss', rnn_i.compute_loss(xs_i, ys_i) if j % 10 == 0: print 'dev loss', rnn_i.compute_loss(dev_xs_i, dev_ys_i) # # extra stuff to print # for x,y in zip(xs_i,ys)[:5]: # yhat = rnn_i.predict(x) # print x, yhat, np.argmax(yhat) return self.rnns
def learn(self, xy_data): # This function is supposed to learn some stats from the training # data and store it in our cache # Structure of Cache: # (index of x0 and x1, digit x0 of first arg, digit x1 of second arg, index of answer) # the counter then stores how many times we see each character # appear for this digit of the answer self.cache = defaultdict(Counter) for x,y in xy_data: # we split our args by the operator nums = self.num_split(x) y = lengthen(y, self.y_len) for i,(x0,x1) in enumerate(zip(nums[0], nums[1])): # for digit of x0,x1 for j,y0 in enumerate(y): # for digit of y self.cache[(i,x0,x1,j)][y0] += 1 return self.cache
def learn(self, xy_data): # This function is supposed to learn some stats from the training # data and store it in our cache # Structure of Cache: # (index of x0 and x1, digit x0 of first arg, digit x1 of second arg, index of answer) # the counter then stores how many times we see each character # appear for this digit of the answer self.cache = defaultdict(Counter) for x, y in xy_data: # we split our args by the operator nums = self.num_split(x) y = lengthen(y, self.y_len) for i, (x0, x1) in enumerate(zip(nums[0], nums[1])): # for digit of x0,x1 for j, y0 in enumerate(y): # for digit of y self.cache[(i, x0, x1, j)][y0] += 1 return self.cache
def lengthen_double(self, x_string): # format from '21 + 12' -> '021 + 012' return ' + '.join([lengthen(s, self.x_len) for s in x_string.split(' + ')])
def lengthen_double(self, x_string): # format from '21 + 12' -> '021 + 012' return ' + '.join( [lengthen(s, self.x_len) for s in x_string.split(' + ')])
def num_split(self, x_string): # split args by operator, e.g. '21 + 12' => ['021','012'] # assuming only addition for now return [lengthen(s, self.x_len) for s in x_string.split(' + ')]