def gradient_check(): # prepare a subset of the train data subset = 50 grad_train_img = train['images'][:subset, :].T grad_train_truth = train['one_hot'][:subset, :].T # init the network N_hidden = 50 lin = [ Linear(cifar.in_size, N_hidden, lam=0.1), Linear(N_hidden, cifar.out_size, lam=0.1) ] g_net = Net( [lin[0], ReLU(N_hidden), lin[1], Softmax(cifar.out_size)], lam=0.1, l_rate=0.001, decay=0.99, mom=0.99) # do the pass grad_out = g_net.forward(grad_train_img) g_net.backward(grad_train_truth) cost = g_net.cost(grad_train_truth, out=grad_out) # calc the numeric grad for each linear layer for linear in lin: num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
def gradient_check(lam, lin_neurons, with_BN): # prepare a subset of the train data subset = 50 grad_train_img = train['images'][:subset, :].T grad_train_truth = train['one_hot'][:subset, :].T count = 0 layers = [] for N in lin_neurons: not_last_layer = count < (len(lin_neurons) - 1) layers.append( Linear(cifar.in_size if count == 0 else lin_neurons[count - 1], N if not_last_layer else cifar.out_size, lam=lam)) if not_last_layer: if with_BN: layers.append(BatchNorm(N)) layers.append(ReLU(N)) count += 1 if len(lin_neurons) == 1 and with_BN: layers.append(BatchNorm(cifar.out_size)) layers.append(Softmax(cifar.out_size)) # init the network print(["{}:{},{}".format(l.name, l.in_size, l.out_size) for l in layers]) g_net = Net(layers, lam=lam, l_rate=0.001, decay=0.99, mom=0.99) # do the pass grad_out = g_net.forward(grad_train_img, train=True) g_net.backward(grad_train_truth) cost = g_net.cost(grad_train_truth, out=grad_out) # calc the numeric grad for each linear layer for linear in [l for l in layers if l.isActivation == False]: num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
from net import Net net = Net() x = net.variable(1) y = net.variable(2) o = net.mul(x, y) print('net.forward()=', net.forward()) print('net.backwward()') net.backward() print('x=', x, 'y=', y, 'o=', o) print('gfx = x.g/o.g = ', x.g / o.g, 'gfy = y.g/o.g=', y.g / o.g) # net.gradient_descendent()
n = 10000 data.train_input(x[:n], y[:n]) data.test_input(xt, yt) data.batch_size(16) lr = 0.0009 gamma = 0.9 for epoch in xrange(50): print 'Epoch: ', epoch # Training (Mini-batch) now = time.time() for _ in xrange(data.batch_run()): net.input(data.next_batch()) net.forward() net.backward(lr) t = time.time() - now acc, loss = net.get_record() print 'Acc: ', np.array(acc).mean() print 'Loss: ', np.array(loss).mean() print 'Time: ', t f, b = net.get_profile() net.clear_record() # Testing net.input(data.test()) net.forward() print 'Val: ', net.get_record()[0][0] print 'Loss: ', net.get_record()[1][0] net.clear_record() print
total_epoch = 100 loss_cache = 10 for epoch in xrange(1, total_epoch + 1): print 'Epoch: {}/{}'.format(epoch, total_epoch) # Training (Mini-batch) now = time.time() data.shuffle() bar = Bar(max_value=n) bar.cursor.clear_lines(2) # Make some room bar.cursor.save() # Mark starting line for _ in xrange(data.batch_run()): net.input(data.next_batch()) net.forward() net.backward(lr, beta_1, beta_2, epoch) bar.cursor.restore() # Return cursor to start bar.draw(value=data.get_count()) t = time.time() - now acc, loss = net.get_record() loss_avg = np.array(loss).mean() loss_diff = loss_avg - loss_cache loss_cache = loss_avg print 'Acc: ', np.array(acc).mean() print 'Loss: ', loss_avg print 'Time: ', t f, b = net.get_profile() net.clear_record() bar_t = Bar(max_value=nt) bar_t.cursor.clear_lines(2) # Make some room