def train_network(model, num_epochs=100, minibatch_size=256, lr=0.01, mom=0.75, wd=5e-4): # load data (train_data, test_data) = mnist_io.load_mb_from_mat('mnist_all.mat', minibatch_size / len(gpu)) num_test_samples = test_data[0].shape[0] test_samples = owl.from_numpy(test_data[0]).reshape([28, 28, 1, num_test_samples]) test_labels = owl.from_numpy(test_data[1]) for i in xrange(num_epochs): print "---Epoch #", i last = time.time() count = 0 weightgrads = [None] * len(gpu) biasgrads = [None] * len(gpu) for (mb_samples, mb_labels) in train_data: count += 1 current_gpu = count % len(gpu) owl.set_device(gpu[current_gpu]) num_samples = mb_samples.shape[0] data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples]) label = owl.from_numpy(mb_labels) out, weightgrads[current_gpu], biasgrads[current_gpu] = bpprop(model, data, label) out.start_eval() if current_gpu == 0: for k in range(len(model.weights)): model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(weightgrads, 0, k) - lr * wd * model.weights[k] model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(biasgrads, 0, k) model.weights[k] += model.weightdelta[k] model.bias[k] += model.biasdelta[k] if count % (len(gpu) * lazy_cycle) == 0: print_training_accuracy(out, label, num_samples, 'Training') print '---End of Epoch #', i, 'time:', time.time() - last # do test out, _, _ = bpprop(model, test_samples, test_labels) print_training_accuracy(out, test_labels, num_test_samples, 'Testing')
def train_network(model, num_epochs=100, minibatch_size=256, lr=0.01, mom=0.75, wd=5e-4): # load data (train_data, test_data) = mnist_io.load_mb_from_mat('mnist_all.mat', minibatch_size / len(gpu)) num_test_samples = test_data[0].shape[0] test_samples = owl.from_numpy(test_data[0]).reshape([28, 28, 1, num_test_samples]) test_labels = owl.from_numpy(test_data[1]) for i in xrange(num_epochs): print "---Epoch #", i last = time.time() count = 0 weightgrads = [None] * len(gpu) biasgrads = [None] * len(gpu) for (mb_samples, mb_labels) in train_data: count += 1 current_gpu = count % len(gpu) owl.set_device(gpu[current_gpu]) num_samples = mb_samples.shape[0] data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples]) label = owl.from_numpy(mb_labels) out, weightgrads[current_gpu], biasgrads[current_gpu] = bpprop(model, data, label) if current_gpu == 0: for k in range(len(model.weights)): model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(weightgrads, 0, k) - lr * wd * model.weights[k] model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(biasgrads, 0, k) model.weights[k] += model.weightdelta[k] model.bias[k] += model.biasdelta[k] if count % (len(gpu) * lazy_cycle) == 0: print_training_accuracy(out, label, num_samples, 'Training') print '---End of Epoch #', i, 'time:', time.time() - last # do test out, _, _ = bpprop(model, test_samples, test_labels) print_training_accuracy(out, test_labels, num_test_samples, 'Testing')
def run(self): (train_data, test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size) np.set_printoptions(linewidth=200) num_test_samples = test_data[0].shape[0] (test_samples, test_labels) = map(lambda npdata: owl.from_numpy(npdata), test_data) count = 1 owl.set_device(self.gpu) for epoch in range(self.num_epochs): print '---Start epoch #%d' % epoch # train for (mb_samples, mb_labels) in train_data: num_samples = mb_samples.shape[0] a1 = owl.from_numpy(mb_samples) target = owl.from_numpy(mb_labels) # ff a2 = ele.relu(self.w1 * a1 + self.b1) a3 = self.w2 * a2 + self.b2 # softmax & error out = co.softmax(a3) s3 = out - target # bp s2 = self.w2.trans() * s3 s2 = ele.relu_back(s2, a2) # grad gw1 = s2 * a1.trans() / num_samples gb1 = s2.sum(1) / num_samples gw2 = s3 * a2.trans() / num_samples gb2 = s3.sum(1) / num_samples # update self.w1 -= self.eps_w * gw1 self.w2 -= self.eps_w * gw2 self.b1 -= self.eps_b * gb1 self.b2 -= self.eps_b * gb2 if (count % 40 == 0): correct = out.argmax(0) - target.argmax(0) val = correct.to_numpy() print 'Training error:', float( np.count_nonzero(val)) / num_samples count = count + 1 # test a1 = test_samples a2 = ele.relu(self.w1 * a1 + self.b1) a3 = self.w2 * a2 + self.b2 correct = a3.argmax(0) - test_labels.argmax(0) val = correct.to_numpy() #print val print 'Testing error:', float( np.count_nonzero(val)) / num_test_samples print '---Finish epoch #%d' % epoch
def run(self): (train_data, test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size) np.set_printoptions(linewidth=200) num_test_samples = test_data[0].shape[0] (test_samples, test_labels) = test_data count = 1 for epoch in range(self.num_epochs): print '---Start epoch #%d' % epoch # train for (mb_samples, mb_labels) in train_data: num_samples = mb_samples.shape[0] a1 = mb_samples.T target = mb_labels.T # ff a2 = relu(np.dot(self.w1, a1) + self.b1) a3 = np.dot(self.w2, a2) + self.b2 # softmax & error out = softmax(a3) s3 = out - target # bp s2 = np.dot(self.w2.T, s3) s2 = relu_back(s2, a2) # grad gw1 = np.dot(s2, a1.T) / num_samples gb1 = np.sum(s2, axis=1, keepdims=True) / num_samples gw2 = np.dot(s3, a2.T) / num_samples gb2 = np.sum(s3, axis=1, keepdims=True) / num_samples # update self.w1 -= self.eps_w * gw1 self.w2 -= self.eps_w * gw2 self.b1 -= self.eps_b * gb1 self.b2 -= self.eps_b * gb2 if (count % 40 == 0): correct = np.max_index(out, axis=0) - np.max_index(target, axis=0) print 'Training error:', float( np.count_nonzero(correct)) / num_samples count = count + 1 # test a1 = test_samples.T a2 = relu(np.dot(self.w1, a1) + self.b1) a3 = np.dot(self.w2, a2) + self.b2 correct = np.max_index(a3, axis=0) - np.max_index(test_labels.T, axis=0) #print correct print 'Testing error:', float( np.count_nonzero(correct)) / num_test_samples print '---Finish epoch #%d' % epoch
def run(self): (train_data, test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size) np.set_printoptions(linewidth=200) num_test_samples = test_data[0].shape[0] (test_samples, test_labels) = map(lambda npdata : owl.from_numpy(npdata), test_data) count = 1 owl.set_device(self.gpu) for epoch in range(self.num_epochs): print '---Start epoch #%d' % epoch # train for (mb_samples, mb_labels) in train_data: num_samples = mb_samples.shape[0] a1 = owl.from_numpy(mb_samples) target = owl.from_numpy(mb_labels) # ff a2 = ele.relu(self.w1 * a1 + self.b1) a3 = self.w2 * a2 + self.b2 # softmax & error out = co.softmax(a3) s3 = out - target # bp s2 = self.w2.trans() * s3 s2 = ele.relu_back(s2, a2) # grad gw1 = s2 * a1.trans() / num_samples gb1 = s2.sum(1) / num_samples gw2 = s3 * a2.trans() / num_samples gb2 = s3.sum(1) / num_samples # update self.w1 -= self.eps_w * gw1 self.w2 -= self.eps_w * gw2 self.b1 -= self.eps_b * gb1 self.b2 -= self.eps_b * gb2 if (count % 40 == 0): correct = out.max_index(0) - target.max_index(0) val = correct.to_numpy() print 'Training error:', float(np.count_nonzero(val)) / num_samples count = count + 1 # test a1 = test_samples a2 = ele.relu(self.w1 * a1 + self.b1) a3 = self.w2 * a2 + self.b2 correct = a3.max_index(0) - test_labels.max_index(0) val = correct.to_numpy() #print val print 'Testing error:', float(np.count_nonzero(val)) / num_test_samples print '---Finish epoch #%d' % epoch
def run(self): (train_data, test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size) np.set_printoptions(linewidth=200) num_test_samples = test_data[0].shape[0] (test_samples, test_labels) = test_data count = 1 for epoch in range(self.num_epochs): print '---Start epoch #%d' % epoch # train for (mb_samples, mb_labels) in train_data: num_samples = mb_samples.shape[0] a1 = mb_samples.T target = mb_labels.T # ff a2 = relu(np.dot(self.w1, a1) + self.b1) a3 = np.dot(self.w2, a2) + self.b2 # softmax & error out = softmax(a3) s3 = out - target # bp s2 = np.dot(self.w2.T, s3) s2 = relu_back(s2, a2) # grad gw1 = np.dot(s2, a1.T) / num_samples gb1 = np.sum(s2, axis=1, keepdims=True) / num_samples gw2 = np.dot(s3, a2.T) / num_samples gb2 = np.sum(s3, axis=1, keepdims=True) / num_samples # update self.w1 -= self.eps_w * gw1 self.w2 -= self.eps_w * gw2 self.b1 -= self.eps_b * gb1 self.b2 -= self.eps_b * gb2 if (count % 40 == 0): correct = np.max_index(out, axis=0) - np.max_index(target, axis=0) print 'Training error:', float(np.count_nonzero(correct)) / num_samples count = count + 1 # test a1 = test_samples.T a2 = relu(np.dot(self.w1, a1) + self.b1) a3 = np.dot(self.w2, a2) + self.b2 correct = np.max_index(a3, axis=0) - np.max_index(test_labels.T, axis=0) #print correct print 'Testing error:', float(np.count_nonzero(correct)) / num_test_samples print '---Finish epoch #%d' % epoch
def train_network(filename, model, num_epochs=5, minibatch_size=256, lr=0.1, lr_decay= 0.95, mom=0.9, wd=5e-4): # load data (train_data, test_data) = mnist_io.load_mb_from_mat(filename, minibatch_size / len(devs)) num_test_samples = test_data[0].shape[0] test_samples = owl.from_numpy(test_data[0]).reshape([28, 28, 1, num_test_samples]) test_labels = owl.from_numpy(test_data[1]) for i in xrange(num_epochs): print "---Epoch #", i last = time.time() count = 0 weightgrads = [None] * len(devs) biasgrads = [None] * len(devs) for (mb_samples, mb_labels) in train_data: count += 1 current_dev = count % len(devs) owl.set_device(devs[current_dev]) num_samples = mb_samples.shape[0] data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples]) label = owl.from_numpy(mb_labels) #print "\t[{}]Train Data imported to minerva format".format(count) out, weightgrads[current_dev], biasgrads[current_dev] = bpprop(model, data, label) #print "\t[{}]Backprop complete".format(count) # print "dev {}".format(current_dev) if current_dev == 0: # print "pre-merge" for k in range(len(model.weights)): model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples / len(devs) * multi_dev_merge(weightgrads, 0, k) - lr * wd * model.weights[k] # print "\t weight merge" model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(devs) * multi_dev_merge(biasgrads, 0, k) # print "\t bias merge" model.weights[k] += model.weightdelta[k] model.bias[k] += model.biasdelta[k] # print "post-merge" if count % (len(devs) * lazy_cycle) == 0: print_training_accuracy(out, label, num_samples, 'Training ' + str(count)) owl.print_profiler_result() print '---End of Epoch #', i, 'time:', time.time() - last lr = lr*lr_decay # do test out, _, _ = bpprop(model, test_samples, test_labels) print_training_accuracy(out, test_labels, num_test_samples, 'Testing')