def test_momentum_cuda(self): lr = 0.1 n, m = 2, 2 p1 = np.random.rand(n, m) p2 = np.random.rand(n, m) g1 = np.random.rand(n, m) * 0.01 g2 = np.random.rand(n, m) * 0.01 v1 = np.zeros((n, m)) v2 = np.zeros((n, m)) t1 = tensor.from_numpy(p1) t2 = tensor.from_numpy(p2) tg1 = tensor.from_numpy(g1) tg2 = tensor.from_numpy(g2) for t in range(1, 4): np_momentum([p1, p2], [g1, g2], [v1, v2], lr, t) momentum = opt.SGD(lr, momentum=0.9) self.to_cuda() for t in range(1, 4): momentum.apply(0, tg1, t1, 'p1', t) momentum.apply(0, tg2, t2, 'p2', t) t1 = tensor.to_numpy(t1) t2 = tensor.to_numpy(t2) for t, p in zip([t1, t2], [p1, p2]): for i in range(n): for j in range(m): self.assertAlmostEqual(t[i, j], p[i, j], 2)
def test_sgd(self): lr = 0.1 sgd = opt.SGD(lr) sgd.apply(0, self.g, self.W, 'w') w = tensor.to_numpy(self.W) for i in range(self.W.size()): self.assertAlmostEqual(w[i], self.np_W[i] - lr * self.np_g[i])
def train(data_dir, net, num_epoch=20, batch_size=250): print 'Start intialization............' cuda = device.create_cuda_gpu() net.to_device(cuda) opt = optimizer.SGD(momentum=0.9,weight_decay=0.04) for (p, specs) in zip(net.param_values(), net.param_specs()): filler = specs.filler if filler.type == 'gaussian': initializer.gaussian(p, filler.mean, filler.std) else: p.set_value(0) opt.register(p, specs) print specs.name, filler.type, p.l1() print 'Loading data ..................' train_x, train_y = load_dataset(data_dir,1) test_x, test_y = load_dataset(data_dir,2) tx = tensor.Tensor((batch_size,3), cuda) ty = tensor.Tensor((batch_size,),cuda, core_pb2.kInt) #ta = tensor.Tensor((batch_size,3), cuda) #tb = tensor.Tensor((batch_size,),cuda, core_pb2.kInt) num_train_batch = train_x.shape[0]/batch_size num_test_batch = test_x.shape[0]/batch_size idx = np.arange(train_x.shape[0], dtype=np.int32) id = np.arange(test_x.shape[0],dtype=np.int32) for epoch in range(num_epoch): np.random.shuffle(idx) loss, acc = 0.000,0.000 print 'Epoch %d' % epoch for b in range(num_train_batch): x = train_x[idx[b * batch_size:(b+1)* batch_size]] y = train_y[idx[b * batch_size:(b+1)* batch_size]] tx.copy_from_numpy(x) ty.copy_from_numpy(y) grads, (l, a) = net.train(tx, ty) loss += l acc += a for (s, p, g) in zip(net.param_specs(), net.param_values(), grads): opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name)) # update progress bar utils.update_progress(b * 1.0 / num_train_batch, 'training loss = %f, accuracy = %f' % (l, a)) info = '\ntraining loss = %f, training accuracy = %f' \ % (loss/num_train_batch, acc/num_train_batch) print info loss,acc=0.000,0.000 np.random.shuffle(id) for b in range(num_test_batch): x = test_x[b * batch_size:(b+1) * batch_size] y = test_y[b * batch_size:(b+1) * batch_size] tx.copy_from_numpy(x) ty.copy_from_numpy(y) l, a = net.evaluate(tx, ty) loss += l acc += a print 'test loss = %f, test accuracy = %f' \ % (loss / num_test_batch, acc / num_test_batch) net.save('model.bin') # save model params into checkpoint file
def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100, use_cpu=False): print('Start intialization............') if use_cpu: print('Using CPU') dev = device.get_default_device() else: print('Using GPU') dev = device.create_cuda_gpu() net.to_device(dev) opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay) for (p, specs) in zip(net.param_names(), net.param_specs()): opt.register(p, specs) tx = tensor.Tensor((batch_size, 3, 32, 32), dev) ty = tensor.Tensor((batch_size, ), dev, core_pb2.kInt) train_x, train_y, test_x, test_y = data num_train_batch = train_x.shape[0] // batch_size num_test_batch = test_x.shape[0] // batch_size idx = np.arange(train_x.shape[0], dtype=np.int32) for epoch in range(max_epoch): np.random.shuffle(idx) loss, acc = 0.0, 0.0 print('Epoch %d' % epoch) for b in range(num_train_batch): x = train_x[idx[b * batch_size:(b + 1) * batch_size]] y = train_y[idx[b * batch_size:(b + 1) * batch_size]] tx.copy_from_numpy(x) ty.copy_from_numpy(y) grads, (l, a) = net.train(tx, ty) loss += l acc += a for (s, p, g) in zip(net.param_names(), net.param_values(), grads): opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b) # update progress bar utils.update_progress(b * 1.0 / num_train_batch, 'training loss = %f, accuracy = %f' % (l, a)) info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \ % ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch)) print(info) loss, acc = 0.0, 0.0 for b in range(num_test_batch): x = test_x[b * batch_size:(b + 1) * batch_size] y = test_y[b * batch_size:(b + 1) * batch_size] tx.copy_from_numpy(x) ty.copy_from_numpy(y) l, a = net.evaluate(tx, ty) loss += l acc += a print('test loss = %f, test accuracy = %f' % ((loss / num_test_batch), (acc / num_test_batch))) net.save('model', 20) # save model params into checkpoint file
print("train_label_shape:", label.shape) inputs = Tensor(data=data) target = Tensor(data=label) w0 = Tensor(shape=(2, 3), requires_grad=True, stores_grad=True) w0.gaussian(0.0, 0.1) b0 = Tensor(shape=(1, 3), requires_grad=True, stores_grad=True) b0.set_value(0.0) w1 = Tensor(shape=(3, 2), requires_grad=True, stores_grad=True) w1.gaussian(0.0, 0.1) b1 = Tensor(shape=(1, 2), requires_grad=True, stores_grad=True) b1.set_value(0.0) sgd = optimizer.SGD(0.05) # training process for i in range(1001): x = autograd.matmul(inputs, w0) x = autograd.add_bias(x, b0) x = autograd.relu(x) x = autograd.matmul(x, w1) x = autograd.add_bias(x, b1) x = autograd.softmax(x) loss = autograd.cross_entropy(x, target) for p, gp in autograd.backward(loss): sgd.apply(0, gp, p, "") if i % 100 == 0: print("training loss = ", tensor.to_numpy(loss)[0])
def train(data_file, use_gpu, num_epoch=10, batch_size=100): print('Start intialization............') lr = 0.1 # Learning rate weight_decay = 0.0002 hdim = 1000 vdim = 784 tweight = tensor.Tensor((vdim, hdim)) tweight.gaussian(0.0, 0.1) tvbias = tensor.from_numpy(np.zeros(vdim, dtype=np.float32)) thbias = tensor.from_numpy(np.zeros(hdim, dtype=np.float32)) opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay) print('Loading data ..................') train_x, valid_x = load_train_data(data_file) if use_gpu: dev = device.create_cuda_gpu() else: dev = device.get_default_device() for t in [tweight, tvbias, thbias]: t.to_device(dev) num_train_batch = train_x.shape[0] // batch_size print("num_train_batch = %d " % (num_train_batch)) for epoch in range(num_epoch): trainerrorsum = 0.0 print('Epoch %d' % epoch) for b in range(num_train_batch): # positive phase tdata = tensor.from_numpy( train_x[(b * batch_size):((b + 1) * batch_size), :]) tdata.to_device(dev) tposhidprob = tensor.mult(tdata, tweight) tposhidprob = tposhidprob + thbias tposhidprob = tensor.sigmoid(tposhidprob) tposhidrandom = tensor.Tensor(tposhidprob.shape, dev) tposhidrandom.uniform(0.0, 1.0) tposhidsample = tensor.gt(tposhidprob, tposhidrandom) # negative phase tnegdata = tensor.mult(tposhidsample, tweight.T()) tnegdata = tnegdata + tvbias tnegdata = tensor.sigmoid(tnegdata) tneghidprob = tensor.mult(tnegdata, tweight) tneghidprob = tneghidprob + thbias tneghidprob = tensor.sigmoid(tneghidprob) error = tensor.sum(tensor.square((tdata - tnegdata))) trainerrorsum = error + trainerrorsum tgweight = tensor.mult(tnegdata.T(), tneghidprob) \ - tensor.mult(tdata.T(), tposhidprob) tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0) tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0) opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w') opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb') opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb') print('training erroraverage = %f' % (tensor.to_numpy(trainerrorsum) / train_x.shape[0])) tvaliddata = tensor.from_numpy(valid_x) tvaliddata.to_device(dev) tvalidposhidprob = tensor.mult(tvaliddata, tweight) tvalidposhidprob = tvalidposhidprob + thbias tvalidposhidprob = tensor.sigmoid(tvalidposhidprob) tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev) initializer.uniform(tvalidposhidrandom, 0.0, 1.0) tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom) tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T()) tvalidnegdata = tvalidnegdata + tvbias tvalidnegdata = tensor.sigmoid(tvalidnegdata) validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata))) print('valid erroraverage = %f' % (tensor.to_numpy(validerrorsum) / valid_x.shape[0]))
def train(inputfolder, outputfolder, visfolder, trainratio, validationratio, testratio, dev, agent, max_epoch, use_cpu, batch_size=100): opt = optimizer.SGD(momentum=0.9, weight_decay=0.01) agent.push(MsgType.kStatus, 'Downlaoding data...') # all_feature, all_label = get_data(os.path.join(inputfolder, 'features.txt'), os.path.join(inputfolder, 'label.txt')) # PUT THE DATA on/to dbsystem all_feature, all_label = get_data( os.path.join(inputfolder, 'features.txt'), os.path.join(inputfolder, 'label.txt')) # PUT THE DATA on/to dbsystem agent.push(MsgType.kStatus, 'Finish downloading data') n_folds = 5 print("all_label shape: ", all_label.shape) all_label = all_label[:, 1] # for i, (train_index, test_index) in enumerate(StratifiedKFold(all_label.reshape(all_label.shape[0]), n_folds=n_folds)): for i in range(3): train_index = np.arange(0, 1404) train_feature, train_label = all_feature[train_index], all_label[ train_index] if i == 0: print("fold: ", i) break print("train label sum: ", train_label.sum()) in_shape = np.array([1, 12, 375]) trainx = tensor.Tensor( (batch_size, int(in_shape[0]), int(in_shape[1]), int(in_shape[2])), dev) trainy = tensor.Tensor((batch_size, ), dev, tensor.int32) num_train_batch = train_feature.shape[0] / batch_size idx = np.arange(train_feature.shape[0], dtype=np.int32) # height = 12 # width = 375 # kernel_y = 3 # kernel_x = 80 # stride_y = 1 # stride_x = 20 hyperpara = np.array([12, 375, 3, 10, 1, 3]) height, width, kernel_y, kernel_x, stride_y, stride_x = hyperpara[ 0], hyperpara[1], hyperpara[2], hyperpara[3], hyperpara[4], hyperpara[ 5] print('kernel_y: ', kernel_y) print('kernel_x: ', kernel_x) print('stride_y: ', stride_y) print('stride_x: ', stride_x) net = model.create_net(in_shape, hyperpara, use_cpu) net.to_device(dev) test_epoch = 10 occlude_test_epoch = 100 for epoch in range(max_epoch): if handle_cmd(agent): break np.random.seed(10) np.random.shuffle(idx) train_feature, train_label = train_feature[idx], train_label[idx] print('Epoch %d' % epoch) loss, acc = 0.0, 0.0 val_loss, val_acc = 0.0, 0.0 # using the first half as validation for b in range(int(num_train_batch)): x, y = train_feature[b * batch_size:(b + 1) * batch_size], train_label[b * batch_size:(b + 1) * batch_size] x = x.reshape((batch_size, in_shape[0], in_shape[1], in_shape[2])) trainx.copy_from_numpy(x) trainy.copy_from_numpy(y) grads, (l, a), probs = net.train(trainx, trainy) loss += l acc += a if b < (int(num_train_batch / 2)): val_loss += l val_acc += a for (s, p, g) in zip(net.param_specs(), net.param_values(), grads): opt.apply_with_lr(epoch, 0.005, g, p, str(s.name)) info = 'training loss = %f, training accuracy = %f' % (l, a) utils.update_progress(b * 1.0 / num_train_batch, info) # put training status info into a shared queue info = dict(phase='train', step=epoch, accuracy=acc / num_train_batch, loss=loss / num_train_batch, timestamp=time.time()) agent.push(MsgType.kInfoMetric, info) info = 'training loss = %f, training accuracy = %f' \ % (loss / num_train_batch, acc / num_train_batch) print(info) val_info = 'validation loss = %f, validation accuracy = %f' \ % (val_loss / (int(num_train_batch / 2)), val_acc / (int(num_train_batch / 2))) print(val_info) if epoch == (max_epoch - 1): print('final val_loss: ', val_loss / (int(num_train_batch / 2))) np.savetxt(outputfolder + '/final_results.txt', np.full((1), val_loss / (int(num_train_batch / 2))), delimiter=",")
def train(inputfolder, outputfolder, visfolder, sampleid, dev, agent, max_epoch, use_cpu, batch_size=100): opt = optimizer.SGD(momentum=0.8, weight_decay=0.01) agent.push(MsgType.kStatus, 'Downlaoding data...') # all_feature, all_label = get_data(os.path.join(inputfolder, 'features_inference.txt'), os.path.join(inputfolder, 'label_inference.txt')) # PUT THE DATA on/to dbsystem all_feature, all_label = get_data( os.path.join(inputfolder, 'features_inference.txt'), os.path.join(inputfolder, 'label_inference.txt')) # PUT THE DATA on/to dbsystem agent.push(MsgType.kStatus, 'Finish downloading data') n_folds = 5 all_label = all_label[:, 1] for i, (train_index, test_index) in enumerate( StratifiedKFold(all_label.reshape(all_label.shape[0]), n_folds=n_folds)): test_index = np.arange(0, 351) test_feature, test_label = all_feature[test_index], all_label[ test_index] if i == 0: print "fold: ", i break print "test label sum: ", test_label.sum() in_shape = np.array([1, 12, 375]) testx = tensor.Tensor( (test_feature.shape[0], in_shape[0], in_shape[1], in_shape[2]), dev) testy = tensor.Tensor((test_feature.shape[0], ), dev, core_pb2.kInt) # num_test_batch = test_x.shape[0] / (batch_size) # height = 12 # width = 375 # kernel_y = 3 # kernel_x = 80 # stride_y = 1 # stride_x = 20 hyperpara = np.array([12, 375, 3, 10, 1, 3]) height, width, kernel_y, kernel_x, stride_y, stride_x = hyperpara[ 0], hyperpara[1], hyperpara[2], hyperpara[3], hyperpara[4], hyperpara[ 5] net = model.create_net(in_shape, hyperpara, use_cpu) print "checkpoint path: ", os.path.join( os.path.join(os.environ.get('GEMINI_HOME'), 'model/readmission_CNN_code/'), 'parameter_last240') net.load( os.path.join( os.path.join(os.environ.get('GEMINI_HOME'), 'model/readmission_CNN_code/'), 'parameter_last240'), 20) net.to_device(dev) for name in zip(net.param_names()): print "init names: ", name test_epoch = 10 occlude_test_epoch = 100 for epoch in range(max_epoch): if handle_cmd(agent): break np.random.seed(10) print 'Epoch %d' % epoch # loss, acc = 0.0, 0.0 # for b in range(num_train_batch): # x, y = train_feature[b * batch_size:(b + 1) * batch_size], train_label[b * batch_size:(b + 1) * batch_size] # x = x.reshape((batch_size, in_shape[0], in_shape[1], in_shape[2])) # trainx.copy_from_numpy(x) # trainy.copy_from_numpy(y) # grads, (l, a), probs = net.train(trainx, trainy) # loss += l # acc += a # for (s, p, g) in zip(net.param_specs(), # net.param_values(), grads): # opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name)) # info = 'training loss = %f, training accuracy = %f' % (l, a) # utils.update_progress(b * 1.0 / num_train_batch, info) # put training status info into a shared queue # info = dict(phase='train', step=epoch, # accuracy=acc/num_train_batch, # loss=loss/num_train_batch, # timestamp=time.time()) # agent.push(MsgType.kInfoMetric, info) # info = 'training loss = %f, training accuracy = %f' \ # % (loss / num_train_batch, acc / num_train_batch) # print info if epoch % test_epoch == 0 or epoch == (max_epoch - 1): loss, acc = 0.0, 0.0 x, y = np.copy(test_feature), np.copy(test_label) x = x.reshape((x.shape[0], in_shape[0], in_shape[1], in_shape[2])) testx.copy_from_numpy(x) testy.copy_from_numpy(y) l, a, probs = net.evaluate(testx, testy) loss += l acc += a print 'testing loss = %f, accuracy = %f' % (loss, acc) # put test status info into a shared queue info = dict(phase='test', step=epoch, accuracy=acc, loss=loss, timestamp=time.time()) agent.push(MsgType.kInfoMetric, info) print 'self calculate test auc = %f' % auroc( softmax(tensor.to_numpy(probs))[:, 1].reshape(-1, 1), y.reshape(-1, 1)) print 'self calculate test accuracy = %f' % cal_accuracy( softmax(tensor.to_numpy(probs))[:, 1].reshape(-1, 1), y.reshape(-1, 1)) cnn_metric_dict = {} # for output to json cnn_metric_dict['Number of Samples: '] = y.shape[0] cnn_sensitivity, cnn_specificity, cnn_harmonic = HealthcareMetrics( softmax(tensor.to_numpy(probs))[:, 1].reshape(-1, 1), y.reshape(-1, 1), 0.25) cnn_metric_dict['AUC: '] = auroc( softmax(tensor.to_numpy(probs))[:, 1].reshape(-1, 1), y.reshape(-1, 1)) # cnn_metric_dict['accuracy: '] = cal_accuracy(softmax(tensor.to_numpy(probs))[:,1].reshape(-1, 1), y.reshape(-1, 1)) cnn_metric_dict['Sensitivity: '] = cnn_sensitivity cnn_metric_dict['Specificity: '] = cnn_specificity try: with open(os.path.join(visfolder, 'cnn_metric_info.json'), 'w') as cnn_metric_info_writer: # json.dump(cnn_metric_dict, cnn_metric_info_writer) cnn_metric_info_writer.write('[') cnn_metric_info_writer.write( '\"Number of Patients: %d\", ' % (y.shape[0])) cnn_metric_info_writer.write('\"AUC: %s\", ' % (str( int(100 * round((auroc( softmax(tensor.to_numpy(probs))[:, 1].reshape( -1, 1), y.reshape(-1, 1))), 2))) + '%')) cnn_metric_info_writer.write( '\"Sensitivity: %s\", ' % (str(int(100 * round(cnn_sensitivity, 2))) + '%')) cnn_metric_info_writer.write( '\"Specificity: %s\" ' % (str(int(100 * round(cnn_specificity, 2))) + '%')) cnn_metric_info_writer.write(']') except Exception as e: os.remove(os.path.join(visfolder, 'cnn_metric_info.json')) print('output cnn_metric_info.json failed: ', e) if epoch == (max_epoch - 1): np.savetxt(os.path.join( os.path.join(os.environ.get('GEMINI_HOME'), 'model/readmission_CNN_code/'), 'readmitted_prob.csv'), softmax(tensor.to_numpy(probs))[:, 1], fmt='%6f', delimiter=",") # if epoch == (max_epoch-1): if epoch == (max_epoch): print "occclude test" # occlude test data height_dim = (height - kernel_y) / stride_y + 1 width_dim = (width - kernel_x) / stride_x + 1 meta_data = np.array([ height_dim, height, kernel_y, stride_y, width_dim, width, kernel_x, stride_x ]) np.savetxt(os.path.join(outputfolder, 'meta_data.csv'), meta_data, fmt='%6f', delimiter=",") #modify here true_label_prob_matrix = np.zeros([(height_dim * width_dim), 1]) for height_idx in range(height_dim): for width_idx in range(width_dim): occlude_test_feature, occlude_test_label = get_occlude_data(np.copy(test_feature), np.copy(test_label), \ height, width, height_idx, width_idx, kernel_y, kernel_x, stride_y, stride_x) loss, acc = 0.0, 0.0 x, y = occlude_test_feature, occlude_test_label # !!! where are the labels? x = x.reshape( (x.shape[0], in_shape[0], in_shape[1], in_shape[2])) testx.copy_from_numpy(x) testy.copy_from_numpy(y) l, a, probs = net.evaluate(testx, testy) y_scores = softmax(tensor.to_numpy(probs))[:, 1] sum_true_label_prob = 0.0 for i in range( 0, x.shape[0] ): # !!! y_scores ~~ the probability of 1 !!! if y[i] == 1: sum_true_label_prob = sum_true_label_prob + y_scores[ i] elif y[i] == 0: sum_true_label_prob = sum_true_label_prob + ( 1 - y_scores[i]) true_label_prob_matrix[ height_idx * width_dim + width_idx, 0] = sum_true_label_prob / x.shape[0] print "occlude x shape: ", x.shape np.savetxt(os.path.join( os.path.join(os.environ.get('GEMINI_HOME'), 'model/readmission_CNN_code/'), 'true_label_prob_matrix.csv'), true_label_prob_matrix, fmt='%6f', delimiter=",") #modify here #for (s, p) in zip(net.param_specs(), net.param_values()): # print "last epoch param name: ", s # print "last epoch param value: ", p.l2() # net.save('parameter_last') print "begin explain" # explain_occlude_area(np.copy(test_feature), np.copy(test_label), os.path.join(outputfolder,'readmitted_prob.csv'), os.path.join(outputfolder,'true_label_prob_matrix.csv'), os.path.join(outputfolder,'meta_data.csv'), top_n = 20) print "begin explain format out" top_n = 30 print "top_n: ", top_n explain_occlude_area_format_out( sampleid, visfolder, np.copy(test_feature), np.copy(test_label), os.path.join( os.path.join(os.environ.get('GEMINI_HOME'), 'model/readmission_CNN_code/'), 'readmitted_prob.csv'), os.path.join( os.path.join(os.environ.get('GEMINI_HOME'), 'model/readmission_CNN_code/'), 'true_label_prob_matrix.csv'), os.path.join( os.path.join(os.environ.get('GEMINI_HOME'), 'model/readmission_CNN_code/'), 'meta_data.csv'), top_n=top_n)
def train(self, data, max_epoch, model_path='model'): if self.use_cpu: print 'Using CPU' self.dev = device.get_default_device() else: print 'Using GPU' self.dev = device.create_cuda_gpu() self.net.to_device(self.dev) opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4) # opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5)) for (p, n) in zip(self.net.param_values(), self.net.param_names()): if 'var' in n: p.set_value(1.0) elif 'gamma' in n: p.uniform(0, 1) elif 'weight' in n: p.gaussian(0, 0.01) else: p.set_value(0.0) print n, p.shape, p.l1() tx = tensor.Tensor((self.batch_size, self.maxlen, self.vocab_size), self.dev) ty = tensor.Tensor((self.batch_size, ), self.dev, core_pb2.kInt) train_x, train_y, test_x, test_y = data num_train_batch = train_x.shape[0] / self.batch_size num_test_batch = test_x.shape[0] / self.batch_size idx = np.arange(train_x.shape[0], dtype=np.int32) for epoch in range(max_epoch): np.random.shuffle(idx) loss, acc = 0.0, 0.0 print '\nEpoch %d' % epoch start = time() for b in range(num_train_batch): batch_loss, batch_acc = 0.0, 0.0 grads = [] x = train_x[ idx[b * self.batch_size:(b + 1) * self.batch_size]] # x.shape = (batch_size, maxlen) y = train_y[idx[b * self.batch_size:(b + 1) * self.batch_size]] # y.shape = (batch_size,) # for input as (batch_size, max_len, vocab_size) sam_arrs = convert_samples(x, x.shape[1], self.vocab_size, self.dev) tx.copy_from_numpy(sam_arrs) ty.copy_from_numpy(np.array(y, dtype='int32')) grads, (batch_loss, batch_acc) = self.net.train(tx, ty) for (s, p, g) in zip(self.net.param_names(), self.net.param_values(), grads): opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b) # update progress bar utils.update_progress( b * 1.0 / num_train_batch, 'training loss = %f, accuracy = %f' % (batch_loss, batch_acc)) loss += batch_loss acc += batch_acc print "\ntraining time = ", time() - start info = 'training loss = %f, training accuracy = %f, lr = %f' \ % (loss / num_train_batch, acc / num_train_batch, get_lr(epoch)) print info loss, acc = 0.0, 0.0 start = time() for b in range(num_test_batch): batch_loss, batch_acc = 0.0, 0.0 x = test_x[b * self.batch_size:(b + 1) * self.batch_size] # x.shape = (batch_size, maxlen) y = test_y[b * self.batch_size:(b + 1) * self.batch_size] sam_arrs = convert_samples(x, x.shape[1], self.vocab_size, self.dev) tx.copy_from_numpy(sam_arrs) ty.copy_from_numpy(np.array(y, dtype='int32')) grads, (batch_loss, batch_acc) = self.net.train(tx, ty) loss += batch_loss acc += batch_acc print "evaluation time = ", time() - start print 'test loss = %f, test accuracy = %f \n' \ % (loss / num_test_batch, acc / num_test_batch) if (epoch % 2) == 1 or epoch + 1 == max_epoch: # checkpoint the file model with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd: print 'saving model to %s_%d.bin' % (model_path, epoch) d = {} for name, w in zip(self.net.param_names(), self.net.param_values()): w.to_host() d[name] = tensor.to_numpy(w) w.to_device(self.dev) pickle.dump(d, fd)
def train(lr, ssfolder, meta_train, meta_test, data, net, mean, max_epoch, get_lr, weight_decay, input_shape, batch_size=100, use_cpu=False): print 'Start intialization............' if use_cpu: print 'Using CPU' dev = device.get_default_device() else: print 'Using GPU' dev = device.create_cuda_gpu() net.to_device(dev) opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay) for (p, specs) in zip(net.param_names(), net.param_specs()): opt.register(p, specs) dl_train = dt.MImageBatchIter(meta_train, batch_size, dt.load_from_img, shuffle=True, delimiter=' ', image_folder=data, capacity=10) dl_train.start() dl_test = dt.MImageBatchIter(meta_test, batch_size, dt.load_from_img, shuffle=False, delimiter=' ', image_folder=data, capacity=10) dl_test.start() num_train = dl_train.num_samples num_train_batch = num_train / batch_size num_test = dl_test.num_samples num_test_batch = num_test / batch_size remainder = num_test % batch_size best_acc = 0.0 best_loss = 0.0 nb_epoch_for_best_acc = 0 tx = tensor.Tensor((batch_size, ) + input_shape, dev) ty = tensor.Tensor((batch_size, ), dev, core_pb2.kInt) for epoch in range(max_epoch): loss, acc = 0.0, 0.0 print 'Epoch %d' % epoch for b in range(num_train_batch): t1 = time.time() x, y = dl_train.next() #print 'x.norm: ', np.linalg.norm(x) x -= mean t2 = time.time() tx.copy_from_numpy(x) ty.copy_from_numpy(y) #print 'copy tx ty ok' grads, (l, a) = net.train(tx, ty) loss += l acc += a for (s, p, g) in zip(net.param_names(), net.param_values(), grads): opt.apply_with_lr(epoch, lr, g, p, str(s), b) t3 = time.time() # update progress bar info = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \ + ', batch %d: training loss = %f, accuracy = %f, load_time = %.4f, training_time = %.4f' % (b, l, a, t2-t1, t3-t2) print info #utils.update_progress(b * 1.0 / num_train_batch, info) disp = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \ + ', epoch %d: training loss = %f, training accuracy = %f, lr = %f' \ % (epoch, loss / num_train_batch, acc / num_train_batch, lr) logging.info(disp) print disp if epoch % 50 == 0 and epoch > 0: try: net.save(os.path.join(ssfolder, 'model-%d' % epoch), buffer_size=200) except Exception as e: print e net.save(os.path.join(ssfolder, 'model-%d' % epoch), buffer_size=300) sinfo = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \ + ', epoch %d: save model in %s' % (epoch, os.path.join(ssfolder, 'model-%d.bin' % epoch)) logging.info(sinfo) print sinfo loss, acc = 0.0, 0.0 #dominator = num_test_batch #print 'num_test_batch: ', num_test_batch for b in range(num_test_batch): x, y = dl_test.next() x -= mean tx.copy_from_numpy(x) ty.copy_from_numpy(y) l, a = net.evaluate(tx, ty) loss += l * batch_size acc += a * batch_size #print datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \ #+ ' batch %d, test loss = %f, test accuracy = %f' % (b, l, a) if remainder > 0: #print 'remainder: ', remainder x, y = dl_test.next() x -= mean tx_rmd = tensor.Tensor((remainder, ) + input_shape, dev) ty_rmd = tensor.Tensor((remainder, ), dev, core_pb2.kInt) tx_rmd.copy_from_numpy(x[0:remainder, :, :]) ty_rmd.copy_from_numpy(y[0:remainder, ]) l, a = net.evaluate(tx_rmd, ty_rmd) loss += l * remainder acc += a * remainder #dominator += 1 #print datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \ #+ ' test loss = %f, test accuracy = %f' % (l, a) acc /= num_test loss /= num_test disp = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \ + ', epoch %d: test loss = %f, test accuracy = %f' % (epoch, loss, acc) logging.info(disp) print disp if acc > best_acc + 0.005: best_acc = acc best_loss = loss nb_epoch_for_best_acc = 0 else: nb_epoch_for_best_acc += 1 if nb_epoch_for_best_acc > 8: break elif nb_epoch_for_best_acc % 4 == 0: lr /= 10 logging.info("Decay the learning rate from %f to %f" % (lr * 10, lr)) try: net.save(str(os.path.join(ssfolder, 'model')), buffer_size=200) except Exception as e: net.save(str(os.path.join(ssfolder, 'model')), buffer_size=300) sinfo = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \ + ', save final model in %s' % os.path.join(ssfolder, 'model.bin') logging.info(sinfo) print sinfo dl_train.end() dl_test.end() return (best_acc, best_loss)