def train_network(model, num_epochs=100, minibatch_size=256, dropout_rate=0.5, eps_w=0.01, eps_b=0.01, mom=0.9, wd=0.0005): gpu0 = owl.create_gpu_device(0) gpu1 = owl.create_gpu_device(1) num_layers = 20 num_weights = 8 count = 0 last = time.time() # dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto', # train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb', # val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb', # test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb') for i in xrange(num_epochs): print "---------------------Epoch #", i for j in xrange(300): count = count + 1 data = owl.randn([227, 227, 3, minibatch_size], 0, 1) label = owl.randn([1, minibatch_size], 0, 1) weightsgrad = [None] * num_weights biasgrad = [None] * num_weights num_samples = minibatch_size ''' thisimg = samples[0, :] print thisimg imgdata = np.transpose(thisimg.reshape([3, 227*227])).reshape([227, 227, 3]) print imgdata img = Image.fromarray(imgdata.astype(np.uint8)) img.save('testimg.jpg', format='JPEG') exit(0) ''' owl.set_device(gpu0) out = train_one_mb(model, data, label, weightsgrad, biasgrad, dropout_rate) for k in range(num_weights): model.weightsdelta[ k] = mom * model.weightsdelta[k] - eps_w / num_samples * ( weightsgrad[k] + wd * model.weights[k]) model.biasdelta[ k] = mom * model.biasdelta[k] - eps_b / num_samples * ( biasgrad[k] + wd * model.bias[k]) model.weights[k] += model.weightsdelta[k] model.weights[k].start_eval() model.bias[k] += model.biasdelta[k] model.bias[k].start_eval() if count % 3 == 0: print_training_accuracy(out, label, data.shape[-1]) print "time: %s" % (time.time() - last) last = time.time()
def train_network(model, num_epochs = 100, minibatch_size=256, dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005): gpu0 = owl.create_gpu_device(0) gpu1 = owl.create_gpu_device(1) num_layers = 20 num_weights = 8 count = 0 last = time.time() dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto', train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb', val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb', test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb') minibatch_size = minibatch_size / 2 for i in xrange(num_epochs): print "---------------------Epoch #", i for j in xrange(300): count = count + 1 if count % 2 == 1: data1 = owl.from_nparray(samples).reshape([227, 227, 3, samples.shape[0]]) label1 = owl.from_nparray(labels) #data1 = owl.randn([227, 227, 3, minibatch_size], 0, 1) #label1 = owl.randn([1, minibatch_size], 0, 1) weightsgrad1 = [None] * num_weights biasgrad1 = [None] * num_weights owl.set_device(gpu0) out1 = train_one_mb(model, data1, label1, weightsgrad1, biasgrad1, dropout_rate) out1.start_eval() continue if count % 2 == 0: data2 = owl.from_nparray(samples).reshape([227, 227, 3, samples.shape[0]]) label2 = owl.from_nparray(labels) #data2 = owl.randn([227, 227, 3, minibatch_size], 0, 1) #label2 = owl.randn([1, minibatch_size], 0, 1) weightsgrad2 = [None] * num_weights biasgrad2 = [None] * num_weights num_samples = data1.shape[-1] + data2.shape[-1] owl.set_device(gpu1) out2 = train_one_mb(model, data2, label2, weightsgrad2, biasgrad2, dropout_rate) out2.start_eval() for k in range(num_weights): model.weightsdelta[k] = mom * model.weightsdelta[k] - eps_w / num_samples * (weightsgrad1[k] + weightsgrad2[k] + wd * model.weights[k]) model.biasdelta[k] = mom * model.biasdelta[k] - eps_b / num_samples * (biasgrad1[k] + biasgrad2[k]) model.weights[k] += model.weightsdelta[k] model.bias[k] += model.biasdelta[k] if count % 8 == 0: print_training_accuracy(out1, label1, data1.shape[-1]) print "time: %s" % (time.time() - last) last = time.time()
def train_network(model, num_epochs = 100, minibatch_size=256, dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005): gpu = [None] * 2 gpu[0] = owl.create_gpu_device(0) gpu[1] = owl.create_gpu_device(1) num_layers = 20 num_weights = 8 count = 0 last = time.time() dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto', train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb', val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb', test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb') minibatch_size = minibatch_size / 2 wgrad = [None] * 2 bgrad = [None] * 2 num_samples = 0 for i in xrange(num_epochs): print "---------------------Epoch #", i for (samples, labels) in dp.get_train_mb(minibatch_size): #for j in range(300): count = count + 1 gpuid = count % 2 owl.set_device(gpu[gpuid]) data = owl.from_numpy(samples).reshape([227, 227, 3, samples.shape[0]]) label = owl.from_numpy(labels) #data = owl.randn([227, 227, 3, 128], 0.0, 0.01) #label = owl.randn([1000, 128], 0.0, 0.01) num_samples += data.shape[-1] (out, wgrad[gpuid], bgrad[gpuid]) = model.train_one_mb(data, label, dropout_rate) out.start_eval() if count % 2 != 0: continue for k in range(num_weights): wgrad[0][k] += wgrad[1][k] bgrad[0][k] += bgrad[1][k] model.update(wgrad[0], bgrad[0], num_samples, mom, eps_w, wd) if count % 8 == 0: print_training_accuracy(out, label, data.shape[-1]) print "time: %s" % (time.time() - last) last = time.time() num_samples = 0 wgrad = [None] * 2 bgrad = [None] * 2
def train_network(model, num_epochs = 100, minibatch_size=256, dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005): gpu = [None] * 2 gpu[0] = owl.create_gpu_device(0) gpu[1] = owl.create_gpu_device(1) num_layers = 20 num_weights = 8 count = 0 last = time.time() dp = ImageNetDataProvider(mean_file='/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto', train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb', val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb', test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb') minibatch_size = minibatch_size / 2 wgrad = [None] * 2 bgrad = [None] * 2 num_samples = 0 for i in xrange(num_epochs): print "---------------------Epoch #", i for (samples, labels) in dp.get_train_mb(minibatch_size): #for j in range(300): count = count + 1 gpuid = count % 2 owl.set_device(gpu[gpuid]) data = owl.from_numpy(samples).reshape([227, 227, 3, samples.shape[0]]) label = owl.from_numpy(labels) #data = owl.randn([227, 227, 3, 128], 0.0, 0.01) #label = owl.randn([1000, 128], 0.0, 0.01) num_samples += data.shape[-1] (out, wgrad[gpuid], bgrad[gpuid]) = model.train_one_mb(data, label, dropout_rate) if count % 2 != 0: continue for k in range(num_weights): wgrad[0][k] += wgrad[1][k] bgrad[0][k] += bgrad[1][k] model.update(wgrad[0], bgrad[0], num_samples, mom, eps_w, wd) if count % 8 == 0: print_training_accuracy(out, label, data.shape[-1]) print "time: %s" % (time.time() - last) last = time.time() num_samples = 0 wgrad = [None] * 2 bgrad = [None] * 2
def train_network(model, num_epochs = 100, minibatch_size=256, dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005): gpu0 = owl.create_gpu_device(0) gpu1 = owl.create_gpu_device(1) num_layers = 20 num_weights = 8 count = 0 last = time.time() # dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto', # train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb', # val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb', # test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb') for i in xrange(num_epochs): print "---------------------Epoch #", i for j in xrange(300): count = count + 1 data = owl.randn([227, 227, 3, minibatch_size], 0, 1) label = owl.randn([1, minibatch_size], 0, 1) weightsgrad = [None] * num_weights biasgrad = [None] * num_weights num_samples = minibatch_size ''' thisimg = samples[0, :] print thisimg imgdata = np.transpose(thisimg.reshape([3, 227*227])).reshape([227, 227, 3]) print imgdata img = Image.fromarray(imgdata.astype(np.uint8)) img.save('testimg.jpg', format='JPEG') exit(0) ''' owl.set_device(gpu0) out = train_one_mb(model, data, label, weightsgrad, biasgrad, dropout_rate) for k in range(num_weights): model.weightsdelta[k] = mom * model.weightsdelta[k] - eps_w / num_samples * (weightsgrad[k] + wd * model.weights[k]) model.biasdelta[k] = mom * model.biasdelta[k] - eps_b / num_samples * (biasgrad[k] + wd * model.bias[k]) model.weights[k] += model.weightsdelta[k] model.weights[k].start_eval() model.bias[k] += model.biasdelta[k] model.bias[k].start_eval() if count % 3 == 0: print_training_accuracy(out, label, data.shape[-1]) print "time: %s" % (time.time() - last) last = time.time()
def train_network(model, num_epochs = 100, minibatch_size=256, dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005): gpu0 = owl.create_gpu_device(0) owl.set_device(gpu0) num_weights = 8 count = 0 last = time.time() dp = ImageNetDataProvider(mean_file='/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto', train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb', val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb', test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb') for i in xrange(num_epochs): print "---------------------Epoch #", i for (samples, labels) in dp.get_train_mb(minibatch_size): count = count + 1 num_samples = samples.shape[0] data = owl.from_numpy(samples).reshape([227, 227, 3, num_samples]) target = owl.from_numpy(labels) out, weightsgrad, biasgrad = model.train_one_mb(data, target, dropout_rate) model.update(weightsgrad, biasgrad, num_samples, mom, eps_w, wd) if count % 4 == 0: print_training_accuracy(out, target, data.shape[-1]) print "time: %s" % (time.time() - last) last = time.time()
def train_network(model, num_epochs = 100, minibatch_size = 256, lr = 0.01, mom = 0.9, wd = 0.0000): np.set_printoptions(linewidth=200) owl.set_device(owl.create_gpu_device(0)) count = 0 # load data (train_data, test_data) = imageio.load_mb_from_mat("mnist_all.mat", minibatch_size) num_test_samples = test_data[0].shape[0] (test_samples, test_labels) = map(lambda npdata : owl.from_nparray(npdata), test_data) for i in xrange(num_epochs): print "---Epoch #", i for (mb_samples, mb_labels) in train_data: num_samples = mb_samples.shape[0] data = owl.from_nparray(mb_samples).reshape([28, 28, 1, num_samples]) label = owl.from_nparray(mb_labels) out, weightgrad, biasgrad = train(model, data, label) for k in range(len(model.weights)): model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples * weightgrad[k] - wd * model.weights[k] model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples * biasgrad[k] model.weights[k] += model.weightdelta[k] model.bias[k] += model.biasdelta[k] count = count + 1 if (count % 1) == 0: print_training_accuracy(out, label, num_samples) if count == 100: sys.exit()
def __init__(self, solver_file, snapshot, layer_name, result_path, gpu_idx = 0): self.solver_file = solver_file self.snapshot = snapshot self.layer_name = layer_name self.result_path = result_path self.gpu = owl.create_gpu_device(gpu_idx) owl.set_device(self.gpu)
def train_network(model, num_epochs = 100, minibatch_size=10, dropout_rate = 0.5, eps_w = 0.01, mom = 0.9, wd = 0.0005): gpu0 = owl.create_gpu_device(0) owl.set_device(gpu0) num_weights = 8 count = 0 last = time.time() cropped_size = 224 dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto', train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb', val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb', test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb') #mark the output layer output_layer = 'prob' for i in xrange(num_epochs): print "---------------------Epoch #", i for (samples, labels) in dp.get_train_mb(minibatch_size, cropped_size): count = count + 1 num_samples = samples.shape[0] data = owl.from_numpy(samples).reshape([cropped_size, cropped_size, 3, num_samples]) target = owl.from_numpy(labels) model.ff(data, target) print_training_accuracy(model.layers[output_layer].get_act(), target, minibatch_size) model.bp(data, target) exit(0)
def __init__(self, solver_file, snapshot = 0, gpu = 1, sync_freq=1, report=False, do_histogram=False): self.solver_file = solver_file self.snapshot = snapshot self.num_gpu = gpu self.sync_freq = sync_freq self.report = report self.do_histogram=do_histogram if owl.has_mpi(): self.gpu = [] if gpu == 1: #self.gpu += [owl.create_gpu_device(i) for i in range(owl.get_gpu_device_count())] nodes = [owl.get_mpi_device_count(i) for i in range(1,owl.get_mpi_node_count())] for n in range(len(nodes)): print "using {} gpu's on node {}\n".format(nodes[n],n+1) self.gpu += [owl.create_mpi_device(n+1,i+1) for i in range(nodes[n])] self.num_gpu = len(self.gpu) else: self.gpu += [owl.create_cpu_device()] self.gpu += [owl.create_mpi_device(n,0) for n in range(1,owl.get_mpi_node_count())] self.num_gpu = len(self.gpu) print "using {} cpu's over all nodes".format(self.num_gpu) else: if gpu == 1: self.gpu = [owl.create_gpu_device(i) for i in range(self.num_gpu)] self.num_gpu = len(self.gpu) print "using {} gpu devices".format(len(self.gpu)) else: self.gpu = [owl.create_cpu_device()] self.num_gpu = len(self.gpu) print "using {} cpus".format(len(self.gpu))
def __init__(self, solver_file, softmax_layer_name, accuracy_layer_name, snapshot, gpu_idx = 0): self.solver_file = solver_file self.softmax_layer_name = softmax_layer_name self.accuracy_layer_name = accuracy_layer_name self.snapshot = snapshot self.gpu = owl.create_gpu_device(gpu_idx) owl.set_device(self.gpu)
def __init__(self, net_file, solver_file, snapshot, snapshot_dir, num_gpu=1): self.net_file = net_file self.solver_file = solver_file self.snapshot = snapshot self.snapshot_dir = snapshot_dir self.num_gpu = num_gpu self.gpu = [owl.create_gpu_device(i) for i in range(num_gpu)]
def __init__(self, data_file='mnist_all.mat', num_epochs=100, mb_size=256, eps_w=0.01, eps_b=0.01): self.cpu = owl.create_cpu_device() self.gpu = owl.create_gpu_device(0) self.data_file = data_file self.num_epochs=num_epochs self.mb_size=mb_size self.eps_w=eps_w self.eps_b=eps_b # init weight l1 = 784; l2 = 256; l3 = 10 self.l1 = l1; self.l2 = l2; self.l3 = l3 self.w1 = owl.randn([l2, l1], 0.0, math.sqrt(4.0 / (l1 + l2))) self.w2 = owl.randn([l3, l2], 0.0, math.sqrt(4.0 / (l2 + l3))) self.b1 = owl.zeros([l2, 1]) self.b2 = owl.zeros([l3, 1])
import owl import sys owl.initialize(sys.argv) cpu = owl.create_cpu_device() gpu = [owl.create_gpu_device(i) for i in range(owl.get_gpu_device_count())] print ''' __ __ _ __ _ _____ ____ _ _ ___ / | / | | | | \\ | | | ___| | _ \\ | | / / / | / |/ | | | | \\| | | |__ | |_| | | | / / / /| | / /| /| | | | | | | __| | / | |/ / / /_| | / / | / | | | | | |\\ | | |___ | |\\ \\ | / / ___ | /_/ |_/ |_| |_| |_| \\__| |_____| |_| \\_\\ |__/ /_/ |_| ''' print '[INFO] You have %d GPU devices' % len(gpu) print '[INFO] Set device to gpu[0]'
def train_network(model, num_epochs = 100, minibatch_size=256, dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005): gpu = owl.create_gpu_device(1) owl.set_device(gpu) num_layers = 20 count = 0 last = time.time() dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto', train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb', val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb', test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb') acts = [None] * num_layers sens = [None] * num_layers for i in xrange(num_epochs): print "---------------------Epoch #", i sys.stdout.flush() for (samples, labels) in dp.get_train_mb(minibatch_size): num_samples = samples.shape[0] acts = [None] * num_layers sens = [None] * num_layers ''' thisimg = samples[0, :] print thisimg imgdata = np.transpose(thisimg.reshape([3, 227*227])).reshape([227, 227, 3]) print imgdata img = Image.fromarray(imgdata.astype(np.uint8)) img.save('testimg.jpg', format='JPEG') exit(0) ''' # FF acts[0] = owl.from_nparray(samples).reshape([227, 227, 3, num_samples]) #print np.array(acts[0].tolist())[0:227*227*3] target = owl.from_nparray(labels) #np.set_printoptions(linewidth=200) #print acts[0].shape, model.weights[0].shape, model.bias[0].shape #im = np.array(acts[0].tolist()).reshape([num_samples, 227, 227, 3]) #print im[0,:,:,0] #print im[0,:,:,1] #print im[0,:,:,2] #print target.max_index(0).tolist()[0:20] #sys.exit() acts1 = conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0]) acts[1] = ele.relu(acts1)#(conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])) # conv1 acts[2] = pooling_forward(acts[1], model.pooling_infos[0]) # pool1 acts3 = conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1]) # conv2 acts[3] = ele.relu(acts3)#(conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1])) # conv2 acts[4] = pooling_forward(acts[3], model.pooling_infos[1]) # pool2 acts5 = conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2]) # conv3 acts[5] = ele.relu(acts5)#(conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2])) # conv3 acts6 = conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3]) # conv4 acts[6] = ele.relu(acts6)#(conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3])) # conv4 acts7 = conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4]) # conv5 acts[7] = ele.relu(acts7)#(conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4])) # conv5 acts[8] = pooling_forward(acts[7], model.pooling_infos[2]) # pool5 re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples]) acts9 = model.weights[5] * re_acts8 + model.bias[5] # fc6 acts[9] = ele.relu(acts9)#(model.weights[5] * re_acts8 + model.bias[5]) # fc6 mask6 = owl.randb(acts[9].shape, dropout_rate) acts[9] = ele.mult(acts[9], mask6) # drop6 acts10 = model.weights[6] * acts[9] + model.bias[6] # fc7 acts[10] = ele.relu(acts10)#(model.weights[6] * acts[9] + model.bias[6]) # fc7 mask7 = owl.randb(acts[10].shape, dropout_rate) acts[10] = ele.mult(acts[10], mask7) # drop7 acts[11] = model.weights[7] * acts[10] + model.bias[7] # fc8 acts[12] = softmax_forward(acts[11].reshape([1000, 1, 1, num_samples]), soft_op.instance).reshape([1000, num_samples]) # prob # error sens[11] = acts[12] - target # BP sens[10] = model.weights[7].trans() * sens[11] # fc8 sens[10] = ele.mult(sens[10], mask7) # drop7 sens[10] = ele.relu_back(sens[10], acts[10], acts10) # relu7 sens[9] = model.weights[6].trans() * sens[10] sens[9] = ele.mult(sens[9], mask6) # drop6 sens[9] = ele.relu_back(sens[9], acts[9], acts9) # relu6 sens[8] = (model.weights[5].trans() * sens[9]).reshape(acts[8].shape) # fc6 sens[7] = pooling_backward(sens[8], acts[8], acts[7], model.pooling_infos[2]) # pool5 sens[7] = ele.relu_back(sens[7], acts[7], acts7) # relu5 sens[6] = conv_backward_data(sens[7], model.weights[4], model.conv_infos[4]) # conv5 sens[6] = ele.relu_back(sens[6], acts[6], acts6) # relu4 sens[5] = conv_backward_data(sens[6], model.weights[3], model.conv_infos[3]) # conv4 sens[5] = ele.relu_back(sens[5], acts[5], acts5) # relu3 sens[4] = conv_backward_data(sens[5], model.weights[2], model.conv_infos[2]) # conv3 sens[3] = pooling_backward(sens[4], acts[4], acts[3], model.pooling_infos[1]) # pool2 sens[3] = ele.relu_back(sens[3], acts[3], acts3) # relu2 sens[2] = conv_backward_data(sens[3], model.weights[1], model.conv_infos[1]) # conv2 sens[1] = pooling_backward(sens[2], acts[2], acts[1], model.pooling_infos[0]) # pool1 sens[1] = ele.relu_back(sens[1], acts[1], acts1) # relu1 model.weightsdelta[7] = mom * model.weightsdelta[7] - eps_w / num_samples * (sens[11] * acts[10].trans() + wd * model.weights[7]) model.biasdelta[7] = mom * model.biasdelta[7] - eps_b / num_samples * (sens[11].sum(1) + wd * model.bias[7]) model.weightsdelta[6] = mom * model.weightsdelta[6] - eps_w / num_samples * (sens[10] * acts[9].trans() + wd * model.weights[6]) model.biasdelta[6] = mom * model.biasdelta[6] - eps_b / num_samples * (sens[10].sum(1) + wd * model.bias[6]) model.weightsdelta[5] = mom * model.weightsdelta[5] - eps_w / num_samples * (sens[9] * re_acts8.trans() + wd * model.weights[5]) model.biasdelta[5] = mom * model.biasdelta[5] - eps_b / num_samples * (sens[9].sum(1) + wd * model.bias[5]) model.weightsdelta[4] = mom * model.weightsdelta[4] - eps_w / num_samples * (conv_backward_filter(sens[7], acts[6], model.conv_infos[4]) + wd * model.weights[4]) model.biasdelta[4] = mom * model.biasdelta[4] - eps_b / num_samples * (conv_backward_bias(sens[7]) + wd * model.bias[4]) model.weightsdelta[3] = mom * model.weightsdelta[3] - eps_w / num_samples * (conv_backward_filter(sens[6], acts[5], model.conv_infos[3]) + wd * model.weights[3]) model.biasdelta[3] = mom * model.biasdelta[3] - eps_b / num_samples * (conv_backward_bias(sens[6]) + wd * model.bias[3]) model.weightsdelta[2] = mom * model.weightsdelta[2] - eps_w / num_samples * (conv_backward_filter(sens[5], acts[4], model.conv_infos[2]) + wd * model.weights[2]) model.biasdelta[2] = mom * model.biasdelta[2] - eps_b / num_samples * (conv_backward_bias(sens[5]) + wd * model.bias[2]) model.weightsdelta[1] = mom * model.weightsdelta[1] - eps_w / num_samples * (conv_backward_filter(sens[3], acts[2], model.conv_infos[1]) + wd * model.weights[1]) model.biasdelta[1] = mom * model.biasdelta[1] - eps_b / num_samples * (conv_backward_bias(sens[3]) + wd * model.bias[1]) model.weightsdelta[0] = mom * model.weightsdelta[0] - eps_w / num_samples * (conv_backward_filter(sens[1], acts[0], model.conv_infos[0]) + wd * model.weights[0]) model.biasdelta[0] = mom * model.biasdelta[0] - eps_b / num_samples * (conv_backward_bias(sens[1]) + wd * model.bias[0]) for k in range(8): model.weights[k] += model.weightsdelta[k] model.bias[k] += model.biasdelta[k] count = count + 1 #if count % 2 == 0: #acts[18].start_eval() if count % 10 == 0: print_training_accuracy(acts[12], target, num_samples) print "time: %s" % (time.time() - last) last = time.time()
def train_network(model, num_epochs=100, minibatch_size=256, dropout_rate=0.5, eps_w=0.01, eps_b=0.01, mom=0.9, wd=0.0005): gpu = owl.create_gpu_device(1) owl.set_device(gpu) num_layers = 20 count = 0 last = time.time() dp = ImageNetDataProvider( mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto', train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb', val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb', test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb') acts = [None] * num_layers sens = [None] * num_layers for i in xrange(num_epochs): print "---------------------Epoch #", i sys.stdout.flush() for (samples, labels) in dp.get_train_mb(minibatch_size): num_samples = samples.shape[0] acts = [None] * num_layers sens = [None] * num_layers # FF acts[0] = owl.from_nparray(samples).reshape( [227, 227, 3, num_samples]) target = owl.from_nparray(labels) acts1 = conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0]) acts[1] = ele.relu( acts1 ) #(conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])) # conv1 acts[2] = pooling_forward(acts[1], model.pooling_infos[0]) # pool1 acts3 = conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1]) # conv2 acts[3] = ele.relu( acts3 ) #(conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1])) # conv2 acts[4] = pooling_forward(acts[3], model.pooling_infos[1]) # pool2 acts5 = conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2]) # conv3 acts[5] = ele.relu( acts5 ) #(conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2])) # conv3 acts6 = conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3]) # conv4 acts[6] = ele.relu( acts6 ) #(conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3])) # conv4 acts7 = conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4]) # conv5 acts[7] = ele.relu( acts7 ) #(conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4])) # conv5 acts[8] = pooling_forward(acts[7], model.pooling_infos[2]) # pool5 re_acts8 = acts[8].reshape( [np.prod(acts[8].shape[0:3]), num_samples]) acts9 = model.weights[5] * re_acts8 + model.bias[5] # fc6 acts[9] = ele.relu( acts9) #(model.weights[5] * re_acts8 + model.bias[5]) # fc6 mask6 = owl.randb(acts[9].shape, dropout_rate) acts[9] = ele.mult(acts[9], mask6) # drop6 acts10 = model.weights[6] * acts[9] + model.bias[6] # fc7 acts[10] = ele.relu( acts10) #(model.weights[6] * acts[9] + model.bias[6]) # fc7 mask7 = owl.randb(acts[10].shape, dropout_rate) acts[10] = ele.mult(acts[10], mask7) # drop7 acts[11] = model.weights[7] * acts[10] + model.bias[7] # fc8 acts[12] = softmax_forward( acts[11].reshape([1000, 1, 1, num_samples]), soft_op.instance).reshape([1000, num_samples]) # prob # error sens[11] = acts[12] - target # BP sens[10] = model.weights[7].trans() * sens[11] # fc8 sens[10] = ele.mult(sens[10], mask7) # drop7 sens[10] = ele.relu_back(sens[10], acts[10], acts10) # relu7 sens[9] = model.weights[6].trans() * sens[10] sens[9] = ele.mult(sens[9], mask6) # drop6 sens[9] = ele.relu_back(sens[9], acts[9], acts9) # relu6 sens[8] = (model.weights[5].trans() * sens[9]).reshape( acts[8].shape) # fc6 sens[7] = pooling_backward(sens[8], acts[8], acts[7], model.pooling_infos[2]) # pool5 sens[7] = ele.relu_back(sens[7], acts[7], acts7) # relu5 sens[6] = conv_backward_data(sens[7], model.weights[4], model.conv_infos[4]) # conv5 sens[6] = ele.relu_back(sens[6], acts[6], acts6) # relu4 sens[5] = conv_backward_data(sens[6], model.weights[3], model.conv_infos[3]) # conv4 sens[5] = ele.relu_back(sens[5], acts[5], acts5) # relu3 sens[4] = conv_backward_data(sens[5], model.weights[2], model.conv_infos[2]) # conv3 sens[3] = pooling_backward(sens[4], acts[4], acts[3], model.pooling_infos[1]) # pool2 sens[3] = ele.relu_back(sens[3], acts[3], acts3) # relu2 sens[2] = conv_backward_data(sens[3], model.weights[1], model.conv_infos[1]) # conv2 sens[1] = pooling_backward(sens[2], acts[2], acts[1], model.pooling_infos[0]) # pool1 sens[1] = ele.relu_back(sens[1], acts[1], acts1) # relu1 model.weightsdelta[ 7] = mom * model.weightsdelta[7] - eps_w / num_samples * ( sens[11] * acts[10].trans() + wd * model.weights[7]) model.biasdelta[7] = mom * model.biasdelta[ 7] - eps_b / num_samples * sens[11].sum(1) model.weightsdelta[ 6] = mom * model.weightsdelta[6] - eps_w / num_samples * ( sens[10] * acts[9].trans() + wd * model.weights[6]) model.biasdelta[6] = mom * model.biasdelta[ 6] - eps_b / num_samples * sens[10].sum(1) model.weightsdelta[ 5] = mom * model.weightsdelta[5] - eps_w / num_samples * ( sens[9] * re_acts8.trans() + wd * model.weights[5]) model.biasdelta[5] = mom * model.biasdelta[ 5] - eps_b / num_samples * sens[9].sum(1) model.weightsdelta[ 4] = mom * model.weightsdelta[4] - eps_w / num_samples * ( conv_backward_filter(sens[7], acts[6], model.conv_infos[4]) + wd * model.weights[4]) model.biasdelta[4] = mom * model.biasdelta[ 4] - eps_b / num_samples * conv_backward_bias(sens[7]) model.weightsdelta[ 3] = mom * model.weightsdelta[3] - eps_w / num_samples * ( conv_backward_filter(sens[6], acts[5], model.conv_infos[3]) + wd * model.weights[3]) model.biasdelta[3] = mom * model.biasdelta[ 3] - eps_b / num_samples * conv_backward_bias(sens[6]) model.weightsdelta[ 2] = mom * model.weightsdelta[2] - eps_w / num_samples * ( conv_backward_filter(sens[5], acts[4], model.conv_infos[2]) + wd * model.weights[2]) model.biasdelta[2] = mom * model.biasdelta[ 2] - eps_b / num_samples * conv_backward_bias(sens[5]) model.weightsdelta[ 1] = mom * model.weightsdelta[1] - eps_w / num_samples * ( conv_backward_filter(sens[3], acts[2], model.conv_infos[1]) + wd * model.weights[1]) model.biasdelta[1] = mom * model.biasdelta[ 1] - eps_b / num_samples * conv_backward_bias(sens[3]) model.weightsdelta[ 0] = mom * model.weightsdelta[0] - eps_w / num_samples * ( conv_backward_filter(sens[1], acts[0], model.conv_infos[0]) + wd * model.weights[0]) model.biasdelta[0] = mom * model.biasdelta[ 0] - eps_b / num_samples * conv_backward_bias(sens[1]) for k in range(8): model.weights[k] += model.weightsdelta[k] model.bias[k] += model.biasdelta[k] count = count + 1 if count % 10 == 0: print_training_accuracy(acts[12], target, num_samples) print "time: %s" % (time.time() - last) last = time.time()
npgrad = npgrad.reshape(np.prod(weightshape[0:len(weightshape)])) analy_grad = npgrad[position] / owl_net.batch_size / len(gpu) print all_loss print ori_all_loss num_grad = (all_loss - ori_all_loss) / h diff = np.abs(analy_grad - num_grad) info = "analy: %f num: %f ratio: %f" % (analy_grad, num_grad, analy_grad / num_grad) print info if __name__ == "__main__": owl.initialize(sys.argv) gpu = [] gpu.append(owl.create_gpu_device(0)) gpu1 = owl.create_gpu_device(1) owl.set_device(gpu0) #prepare the net and solver builder = CaffeNetBuilder(sys.argv[1], sys.argv[2]) owl_net = net.Net() builder.build_net(owl_net) builder.init_net_from_file(owl_net, sys.argv[3]) accunitname = sys.argv[4] last = time.time() beg_time = last losslayer = get_loss_layer(owl_net) checklayer = owl_net.get_units_by_name(sys.argv[5])[0]
def __init__(self, solver_file, snapshot = 0, num_gpu = 1): self.solver_file = solver_file self.snapshot = snapshot self.num_gpu = num_gpu self.gpu = [owl.create_gpu_device(i) for i in range(num_gpu)]
import owl devices = [] devices.append(owl.create_gpu_device(0)) owl.set_device(devices[-1])
def __init__(self, solver_file, snapshot, gpu_idx = 0): self.solver_file = solver_file self.snapshot = snapshot self.gpu = owl.create_gpu_device(gpu_idx) owl.set_device(self.gpu)
out, _, _ = bpprop(model, test_samples, test_labels) print_training_accuracy(out, test_labels, num_test_samples, 'Testing') def multi_gpu_merge(l, base, layer): if len(l) == 1: return l[0][layer] left = multi_gpu_merge(l[:len(l) / 2], base, layer) right = multi_gpu_merge(l[len(l) / 2:], base + len(l) / 2, layer) owl.set_device(base) return left + right if __name__ == '__main__': owl.initialize(sys.argv) parser = argparse.ArgumentParser(description='MNIST CNN') parser.add_argument('-n', '--num', help='number of GPUs to use', action='store', type=int, default=1) args = parser.parse_args() assert (1 <= args.num) print 'Using %d GPU(s)' % args.num gpu = [owl.create_gpu_device(i) for i in range(args.num)] owl.set_device(gpu[0]) model = MNISTCNNModel() model.init_random() train_network(model)
def __init__(self, solver_file, snapshot, gpu_idx = 1): self.solver_file = solver_file self.snapshot = snapshot self.num_gpu = 1 self.gpu = [owl.create_gpu_device(i) for i in range(gpu_idx)] owl.set_device(self.gpu[gpu_idx-1])
def train_network_n(n, model, num_epochs=100, minibatch_size=40, dropout_rate=0.5, eps_w=0.0001, eps_b=0.0002, mom=0.9, wd=0.0005): gpus = [] for i in range(0, n): gpus.append(owl.create_gpu_device(i)) count = 0 last = time.time() dp = ImageNetDataProvider( mean_file='./VGGmodel/vgg_mean.binaryproto', train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb', val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb', test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb') minibatch_size = minibatch_size / n correct = 0 rerun = False startepoch = 0 curepoch = startepoch data = [None] * n label = [None] * n out = [None] * n biasgrad = [None] * n weightsgrad = [None] * n for i in range(startepoch, num_epochs): print "---------------------Epoch %d Index %d" % (curepoch, i) sys.stdout.flush() batchidx = 0 count = 0 loadmodel(i, model) for (samples, labels) in dp.get_train_mb(minibatch_size, 224): count = count + 1 data[count - 1] = owl.from_numpy(samples).reshape( [224, 224, 3, samples.shape[0]]) label[count - 1] = owl.from_numpy(labels) biasgrad[count - 1] = [None] * (model.num_layers - 1) weightsgrad[count - 1] = [None] * (model.num_layers - 1) owl.set_device(gpus[count - 1]) out[count - 1] = train_one_mb(model, data[count - 1], label[count - 1], weightsgrad[count - 1], biasgrad[count - 1]) out[count - 1].start_eval() if count % n > 0: continue totalweightsgrad = [None] * (model.num_layers - 1) totalbiasgrad = [None] * (model.num_layers - 1) num_samples = 0 for gpuidx in range(0, n): num_samples += data[gpuidx].shape[-1] for k in range(model.num_layers - 1): if model.ff_infos[k]['ff_type'] == 'conv' or model.ff_infos[ k]['ff_type'] == 'fully': if gpuidx == 0: totalweightsgrad[k] = weightsgrad[gpuidx][k] totalbiasgrad[k] = biasgrad[gpuidx][k] else: totalweightsgrad[k] += weightsgrad[gpuidx][k] totalbiasgrad[k] += biasgrad[gpuidx][k] for k in range(model.num_layers - 1): if model.ff_infos[k]['ff_type'] == 'conv' or model.ff_infos[k][ 'ff_type'] == 'fully': model.weightsdelta[k] = mom * model.weightsdelta[ k] - eps_w / num_samples * ( totalweightsgrad[k] + wd * num_samples * model.weights[k]) model.biasdelta[k] = mom * model.biasdelta[ k] - eps_b / num_samples * totalbiasgrad[k] model.weights[k] += model.weightsdelta[k] model.bias[k] += model.biasdelta[k] #print num_samples if count % n == 0: print 'batch %d' % (batchidx) batchidx = batchidx + 1 ''' #TODO hack if batchidx == 2000: savemodel(i+1, model) exit(0) ''' thiscorrect = print_training_accuracy(out[0], label[0], data[0].shape[-1]) print "time: %s" % (time.time() - last) last = time.time() count = 0 savemodel(i + 1, model)
label = owl.from_nparray(labels) out = train_one_mb(model, data, label, weightsgrad[count], biasgrad[count], dropout_rate) # out.start_eval() if count == 0: # Update for k in range(num_weights): for l in range(1, num_gpu): weightsgrad[0][k] = weightsgrad[0][k] + weightsgrad[l][k] biasgrad[0][k] = biasgrad[0][k] + biasgrad[l][k] model.weightsdelta[k] = mom * model.weightsdelta[k] - eps_w / num_samples * (weightsgrad[0][k] + wd * model.weights[k]) model.biasdelta[k] = mom * model.biasdelta[k] - eps_b / num_samples * (biasgrad[0][k] + wd * model.bias[k]) model.weights[k] += model.weightsdelta[k] model.weights[k].start_eval() model.bias[k] += model.biasdelta[k] model.bias[k].start_eval() if j % (lazy * num_gpu) == 0: print_training_accuracy(out, label, minibatch_size) print "time: %s" % (time.time() - last) last = time.time() if __name__ == '__main__': owl.initialize(sys.argv) owl.create_cpu_device() for i in range(num_gpu): gpu_array.append(owl.create_gpu_device(i)) owl.set_device(gpu_array[0]) model = AlexModel() model.init_random() train_network(model)
act_og[t] = ele.sigm(act_og[t]) if tanhC_version: Hout[t] = ele.mult(act_og[t], ele.tanh(C[t])) else: Hout[t] = ele.mult(act_og[t], C[t]) Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias) # evaluation output = Y.to_numpy() # Can directly get a single element from Y # print output[0, sent[t]] sent_ll += math.log(max(output[0, sent[t]],1e-20), 2) test_ll += sent_ll test_ent = test_ll * (-1) / words test_ppl = 2 ** test_ent print "Test PPL =", test_ppl if __name__ == '__main__': owl.initialize(sys.argv) gpu = owl.create_gpu_device(1) owl.set_device(gpu) model, train_sents, test_sents, train_words, test_words = LSTM_init() learning_rate = 0.1 for i in range(5): model, learning_rate = LSTM_train(model, train_sents, train_words, learning_rate, 1) LSTM_test(model, test_sents, test_words)
import time import pickle import gzip from operator import mul import matplotlib.pyplot as plt #The file containing the data in the format of one vector per line space separated floats DATAFILE = "????" if __name__ == "__main__": #Setup minerva cpu = owl.create_cpu_device() if owl.get_gpu_device_count() > 0: dev = owl.create_gpu_device(0) else: dev = cpu owl.set_device(dev) # load data gzfile = gzip.GzipFile('/home/jlovitt/storage/mnist/mnist.dat','rb') #discard stored variable name pickle.load(gzfile) data = pickle.load(gzfile) #data = np.loadtxt(DATAFILE,dtype=np.float32, delimiter=" ") #data = data - np.mean(data, 0) #data = data / np.var(data, 0) data = data/255.0 # training parameters
import owl import sys owl.initialize(sys.argv) owl.create_cpu_device() gpu0 = owl.create_gpu_device(0) gpu1 = owl.create_gpu_device(1) owl.set_device(gpu0)
cpu = owl.create_cpu_device() print "owl: local CPU creation in rank {} with id {}".format(owl.rank(), cpu) sys.stdout.flush() print ''' __ __ _ __ _ _____ ____ _ _ ___ / | / | | | | \\ | | | ___| | _ \\ | | / / / | / |/ | | | | \\| | | |__ | |_| | | | / / / /| | / /| /| | | | | | | __| | / | |/ / / /_| | / / | / | | | | | |\\ | | |___ | |\\ \\ | / / ___ | /_/ |_/ |_| |_| |_| \\__| |_____| |_| \\_\\ |__/ /_/ |_| ''' if owl.has_cuda(): print owl.get_gpu_device_count() gpu = [owl.create_gpu_device(i) for i in range(owl.get_gpu_device_count())] print '[INFO] You have %d GPU devices' % len(gpu) print '[INFO] Set device to gpu[0]' owl.set_device(gpu[0]) if owl.has_mpi(): n = owl.get_mpi_node_count() for i in range(1,n): id = owl.create_mpi_device(i,0) print "owl: created mpi cpu device on rank {} with id {}".format(i, id) else: print '[INFO] CUDA disabled' print '[INFO] Set device to cpu' owl.set_device(cpu) print "\nREADY FOR INPUT\n" #print z.to_numpy()
def __init__(self, solver_file, snapshot = 0, num_gpu = 1, sync_freq=1): self.solver_file = solver_file self.snapshot = snapshot self.num_gpu = num_gpu self.sync_freq = sync_freq self.gpu = [owl.create_gpu_device(i) for i in range(num_gpu)]
analy_grad = npgrad[position] / owl_net.batch_size / len(gpu) print all_loss print ori_all_loss num_grad = (all_loss - ori_all_loss) / h diff = np.abs(analy_grad - num_grad) info = "analy: %f num: %f ratio: %f" % (analy_grad, num_grad, analy_grad / num_grad) print info if __name__ == "__main__": owl.initialize(sys.argv) gpu = [] gpu.append(owl.create_gpu_device(0)) gpu1 = owl.create_gpu_device(1) owl.set_device(gpu0) #prepare the net and solver builder = CaffeNetBuilder(sys.argv[1], sys.argv[2]) owl_net = net.Net() builder.build_net(owl_net) builder.init_net_from_file(owl_net, sys.argv[3]) accunitname = sys.argv[4] last = time.time() beg_time = last losslayer = get_loss_layer(owl_net) checklayer = owl_net.get_units_by_name(sys.argv[5])[0]
model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(biasgrads, 0, k) model.weights[k] += model.weightdelta[k] model.bias[k] += model.biasdelta[k] if count % (len(gpu) * lazy_cycle) == 0: print_training_accuracy(out, label, num_samples, 'Training') print '---End of Epoch #', i, 'time:', time.time() - last # do test out, _, _ = bpprop(model, test_samples, test_labels) print_training_accuracy(out, test_labels, num_test_samples, 'Testing') def multi_gpu_merge(l, base, layer): if len(l) == 1: return l[0][layer] left = multi_gpu_merge(l[:len(l) / 2], base, layer) right = multi_gpu_merge(l[len(l) / 2:], base + len(l) / 2, layer) owl.set_device(base) return left + right if __name__ == '__main__': parser = argparse.ArgumentParser(description='MNIST CNN') parser.add_argument('-n', '--num', help='number of GPUs to use', action='store', type=int, default=1) (args, remain) = parser.parse_known_args() assert(1 <= args.num) print 'Using %d GPU(s)' % args.num owl.initialize(sys.argv) gpu = [owl.create_gpu_device(i) for i in range(args.num)] owl.set_device(gpu[0]) model = MNISTCNNModel() model.init_random() train_network(model)
if tanhC_version: Hout[t] = ele.mult(act_og[t], ele.tanh(C[t])) else: Hout[t] = ele.mult(act_og[t], C[t]) Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias) # evaluation output = Y.to_numpy() # Can directly get a single element from Y # print output[0, sent[t]] sent_ll += math.log(max(output[0, sent[t]], 1e-20), 2) test_ll += sent_ll test_ent = test_ll * (-1) / words test_ppl = 2**test_ent print "Test PPL =", test_ppl if __name__ == '__main__': owl.initialize(sys.argv) gpu = owl.create_gpu_device(1) owl.set_device(gpu) model, train_sents, test_sents, train_words, test_words = LSTM_init() learning_rate = 0.1 for i in range(5): model, learning_rate = LSTM_train(model, train_sents, train_words, learning_rate, 1) LSTM_test(model, test_sents, test_words)
import owl.net as net from net_helper import CaffeNetBuilder def get_weights_id(owl_net): weights_id = [] for i in xrange(len(owl_net.units)): if isinstance(owl_net.units[i], net.WeightedComputeUnit): weights_id.append(i) return weights_id if __name__ == "__main__": owl.initialize(sys.argv) gpu = [None] * 4 gpu[0] = owl.create_gpu_device(0) gpu[1] = owl.create_gpu_device(1) gpu[2] = owl.create_gpu_device(2) gpu[3] = owl.create_gpu_device(3) #prepare the net and solver builder = CaffeNetBuilder(sys.argv[1], sys.argv[2]) owl_net = net.Net() builder.build_net(owl_net) builder.init_net_from_file(owl_net, sys.argv[3]) accunitname = sys.argv[4] last = time.time() wunits = get_weights_id(owl_net) print len(wunits) wgrad = [[] for i in xrange(4)]
def __init__(self, solver_file, snapshot, gpu_idx=0): self.solver_file = solver_file self.snapshot = snapshot self.gpu = owl.create_gpu_device(gpu_idx) owl.set_device(self.gpu)
owl.set_device(gpu_array[count]) data = owl.randn([227, 227, 3, minibatch_size], 0, 1) label = owl.randn([1, minibatch_size], 0, 1) out = train_one_mb(model, data, label, weightsgrad[count], biasgrad[count], dropout_rate) for k in weightsgrad[count]: k.start_eval() for k in biasgrad[count]: k.start_eval() if count == 0: for k in range(0, num_gpu): for l in weightsgrad[k]: l.wait_for_eval() for l in biasgrad[k]: l.wait_for_eval() print "time: %s" % (time.time() - last) last = time.time() if __name__ == '__main__': owl.initialize(sys.argv) owl.create_cpu_device() if num_gpu == 1: gpu_array.append(owl.create_gpu_device(one_gpu_id)) else: for i in range(num_gpu): gpu_array.append(owl.create_gpu_device(i)) owl.set_device(gpu_array[0]) model = AlexModel() model.init_random() train_network(model)