def train_mnist(): epoch_num = 10 if args.benchmark: epoch_num = 1 BATCH_SIZE = 32 with fluid.dygraph.guard(): mnist = MNIST("mnist") #adam = AdamOptimizer(learning_rate=0.001) adam = MomentumOptimizer(learning_rate=0.01, momentum=0.5) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=10, drop_last=True) for epoch in range(epoch_num): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time, losses, prefix="epoch: [{}]".format(epoch)) end = Tools.time() for batch_id, data in enumerate(train_reader()): data_time.update(Tools.time() - end) dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') dy_x_data = normalize(dy_x_data, 0.1307, 0.3081) y_data = np.array([x[1] for x in data ]).astype('int64').reshape(BATCH_SIZE, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = mnist(img, label) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) mnist.clear_gradients() batch_time.update(Tools.time() - end) dy_out = avg_loss.numpy()[0] losses.update(dy_out, BATCH_SIZE) if batch_id % 10 == 0: progress.print(batch_id) end = Tools.time() #if batch_id % 100 == 0: # print("Loss at epoch {} step {}: {:}".format(epoch, batch_id, avg_loss.numpy())) mnist.eval() test_cost, test_acc = test_train(test_reader, mnist, BATCH_SIZE) test_p(eval_reader, mnist, 10) mnist.train() print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format( epoch, test_cost, test_acc))
def train(): with fluid.dygraph.guard(place): if args.benchmark: args.epoch = 1 processor = reader.SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) num_labels = len(processor.get_labels()) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=True) eval_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=args.epoch, shuffle=False) cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size, args.padding_size) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] length = len(list(enumerate(train_data_generator()))) for eop in range(args.epoch): time_begin = time.time() batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(length, batch_time, data_time, prefix="epoch: [{}]".format(eop)) end = Tools.time() for batch_id, data in enumerate(train_data_generator()): data_time.update(Tools.time() - end) steps += 1 doc = to_variable( np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in data ]).astype('int64').reshape(-1, 1)) label = to_variable( np.array([x[1] for x in data ]).astype('int64').reshape(args.batch_size, 1)) cnn_net.train() avg_cost, prediction, acc = cnn_net(doc, label) avg_cost.backward() batch_time.update(Tools.time() - end) np_mask = (doc.numpy() != args.vocab_size).astype('int32') word_num = np.sum(np_mask) sgd_optimizer.minimize(avg_cost) cnn_net.clear_gradients() total_cost.append(avg_cost.numpy() * word_num) total_acc.append(acc.numpy() * word_num) total_num_seqs.append(word_num) if steps % args.skip_steps == 0: time_end = time.time() used_time = time_end - time_begin progress.print(batch_id + 1) #print("step: %d, ave loss: %f, " # "ave acc: %f, speed: %f steps/s" % # (steps, np.sum(total_cost) / np.sum(total_num_seqs), # np.sum(total_acc) / np.sum(total_num_seqs), # args.skip_steps / used_time)) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.validation_steps == 0: total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], [] cnn_net.eval() eval_steps = 0 for eval_batch_id, eval_data in enumerate( eval_data_generator()): eval_np_doc = np.array([ np.pad(x[0][0:args.padding_size], (0, args.padding_size - len(x[0][0:args.padding_size])), 'constant', constant_values=(args.vocab_size)) for x in eval_data ]).astype('int64').reshape(1, -1) eval_label = to_variable( np.array([x[1] for x in eval_data ]).astype('int64').reshape( args.batch_size, 1)) eval_doc = to_variable(eval_np_doc.reshape(-1, 1)) eval_avg_cost, eval_prediction, eval_acc = cnn_net( eval_doc, eval_label) eval_np_mask = (eval_np_doc != args.vocab_size).astype('int32') eval_word_num = np.sum(eval_np_mask) total_eval_cost.append(eval_avg_cost.numpy() * eval_word_num) total_eval_acc.append(eval_acc.numpy() * eval_word_num) total_eval_num_seqs.append(eval_word_num) eval_steps += 1 time_end = time.time() used_time = time_end - time_begin print( "Final validation result: step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (steps, np.sum(total_eval_cost) / np.sum(total_eval_num_seqs), np.sum(total_eval_acc) / np.sum(total_eval_num_seqs), eval_steps / used_time)) time_begin = time.time() # if steps % args.save_steps == 0: # save_path = "save_dir_" + str(steps) # print('save model to: ' + save_path) # fluid.dygraph.save_persistables(cnn_net.state_dict(), # save_path) end = Tools.time()
def train(): """ train models :return: """ trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() transformer = TransFormer( 'transformer', ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout, ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout, ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd, ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps) optimizer = fluid.optimizer.SGD(learning_rate=0.003) if args.use_data_parallel: transformer = fluid.dygraph.parallel.DataParallel( transformer, strategy) reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size), batch_size=TrainTaskConfig.batch_size) if args.use_data_parallel: reader = fluid.contrib.reader.distributed_batch_reader(reader) for i in range(200): dy_step = 0 batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(len(list(reader())) - 1, batch_time, data_time, prefix="epoch: [{}]".format(i)) end = Tools.time() for batch in reader(): data_time.update(Tools.time() - end) np_values = prepare_batch_input(batch, ModelHyperParams.src_pad_idx, ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head) enc_inputs, dec_inputs, label, weights = create_data(np_values) dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer( enc_inputs, dec_inputs, label, weights) if args.use_data_parallel: dy_avg_cost = transformer.scale_loss(dy_avg_cost) dy_avg_cost.backward() transformer.apply_collective_grads() else: dy_avg_cost.backward() optimizer.minimize(dy_avg_cost) transformer.clear_gradients() batch_time.update(Tools.time() - end) dy_step = dy_step + 1 if dy_step % 1 == 0: progress.print(dy_step) print("pass num : {}, batch_id: {}, dy_graph avg loss: {}". format(i, dy_step, dy_avg_cost.numpy())) end = Tools.time() print("pass : {} finished".format(i))
def train_mnist(args): epoch_num = 5 BATCH_SIZE = 256 place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() mnist = MNIST("mnist") adam = AdamOptimizer(learning_rate=0.001) if args.use_data_parallel: mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_sampler( paddle.dataset.mnist.train(), batch_size=BATCH_SIZE) else: train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) for epoch in range(epoch_num): # define eval batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time, losses, prefix="epoch: [{}]".format(epoch)) end = Tools.time() for batch_id, data in enumerate(train_reader()): data_time.update(Tools.time() - end) dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = mnist(img, label) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) if args.use_data_parallel: avg_loss = mnist.scale_loss(avg_loss) avg_loss.backward() mnist.apply_collective_grads() else: avg_loss.backward() adam.minimize(avg_loss) # save checkpoint mnist.clear_gradients() batch_time.update(Tools.time() - end) dy_out = avg_loss.numpy()[0] losses.update(dy_out, BATCH_SIZE) if batch_id % 10 == 0: progress.print(batch_id) end = Tools.time() mnist.eval() test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE) mnist.train() print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format( epoch, test_cost, test_acc)) fluid.dygraph.save_persistables(mnist.state_dict(), "save_dir") print("checkpoint saved") inference_mnist()
def train_resnet(): trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() resnet = ResNet("resnet") optimizer = optimizer_setting() if args.use_data_parallel: resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_sampler( paddle.dataset.flowers.train(use_xmap=False), batch_size=batch_size * trainer_count) else: train_reader = paddle.batch( paddle.dataset.flowers.train(use_xmap=False), batch_size=batch_size) test_reader = paddle.batch( paddle.dataset.flowers.test(use_xmap=False), batch_size=batch_size) #file_name = './model/epoch_0.npz' #model_data = np.load( file_name ) total_pass = len(list(train_reader())) - 1 #total_batch_size = sum(1 for _ in train_reader()) #total_batch_size = 10000 for eop in range(epoch): resnet.train() total_loss = 0.0 total_acc1 = 0.0 total_acc5 = 0.0 total_sample = 0 #dict_state = resnet.state_dict() #resnet.load_dict( model_data ) #print("load finished") batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(total_pass, batch_time, data_time, prefix="epoch: [{}]".format(eop)) end = Tools.time() for batch_id, data in enumerate(train_reader()): data_time.update(Tools.time() - end) dy_x_data = np.array( [x[0].reshape(3, 224, 224) for x in data]).astype('float32') if len(np.array([x[1] for x in data]).astype('int64')) != batch_size: continue y_data = np.array([x[1] for x in data]).astype('int64').reshape( -1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label._stop_gradient = True out = resnet(img) loss = fluid.layers.cross_entropy(input=out, label=label) avg_loss = fluid.layers.mean(x=loss) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) dy_out = avg_loss.numpy() if args.use_data_parallel: avg_loss = resnet.scale_loss(avg_loss) avg_loss.backward() resnet.apply_collective_grads() else: avg_loss.backward() optimizer.minimize(avg_loss) resnet.clear_gradients() batch_time.update(Tools.time() - end) total_loss += dy_out total_acc1 += acc_top1.numpy() total_acc5 += acc_top5.numpy() total_sample += 1 #print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out)) if batch_id % 1 == 0: progress.print(batch_id) print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \ ( eop, batch_id, total_loss / total_sample, \ total_acc1 / total_sample, total_acc5 / total_sample)) end = Tools.time() print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \ (eop, batch_id, total_loss / total_sample, \ total_acc1 / total_sample, total_acc5 / total_sample))
def train_ptb_lm(): args = parse_args() model_type = args.model_type vocab_size = 10000 if model_type == "test": num_layers = 1 batch_size = 2 hidden_size = 10 num_steps = 3 init_scale = 0.1 max_grad_norm = 5.0 epoch_start_decay = 1 max_epoch = 1 dropout = 0.0 lr_decay = 0.5 base_learning_rate = 1.0 elif model_type == "small": num_layers = 2 batch_size = 20 hidden_size = 200 num_steps = 20 init_scale = 0.1 max_grad_norm = 5.0 epoch_start_decay = 4 max_epoch = 13 dropout = 0.0 lr_decay = 0.5 base_learning_rate = 1.0 elif model_type == "medium": num_layers = 2 batch_size = 20 hidden_size = 650 num_steps = 35 init_scale = 0.05 max_grad_norm = 5.0 epoch_start_decay = 6 max_epoch = 39 dropout = 0.5 lr_decay = 0.8 base_learning_rate = 1.0 elif model_type == "large": num_layers = 2 batch_size = 20 hidden_size = 1500 num_steps = 35 init_scale = 0.04 max_grad_norm = 10.0 epoch_start_decay = 14 max_epoch = 55 dropout = 0.65 lr_decay = 1.0 / 1.15 base_learning_rate = 1.0 else: print("model type not support") return with fluid.dygraph.guard(core.CUDAPlace(0)): fluid.default_main_program().random_seed = 33 fluid.default_startup_program().random_seed = 33 np.random.seed(33) ptb_model = PtbModel( "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale, dropout=dropout) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None data_path = args.data_path print("begin to load data") raw_data = reader.ptb_raw_data(data_path) print("finished load data") train_data, valid_data, test_data, _ = raw_data batch_len = len(train_data) // batch_size total_batch_size = (batch_len - 1) // num_steps log_interval = total_batch_size // 100 bd = [] lr_arr = [1.0] for i in range(1, max_epoch): bd.append(total_batch_size * i) new_lr = base_learning_rate * (lr_decay** max(i + 1 - epoch_start_decay, 0.0)) lr_arr.append(new_lr) sgd = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr_arr)) def eval(model, data): print("begion to eval") total_loss = 0.0 iters = 0.0 init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') model.eval() train_data_iter = reader.get_data_iter(data, batch_size, num_steps) for batch_id, batch in enumerate(train_data_iter): x_data, y_data = batch x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, init_cell) out_loss = dy_loss.numpy() init_hidden_data = last_hidden.numpy() init_cell_data = last_cell.numpy() total_loss += out_loss iters += num_steps print("eval finished") ppl = np.exp(total_loss / iters) print("ppl ", batch_id, ppl[0]) grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm) for epoch_id in range(max_epoch): ptb_model.train() total_loss = 0.0 iters = 0.0 init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') train_data_iter = reader.get_data_iter(train_data, batch_size, num_steps) batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(total_batch_size, batch_time, data_time, prefix="epoch: [{}]".format(epoch_id)) start_time = time.time() end = Tools.time() for batch_id, batch in enumerate(train_data_iter): data_time.update(Tools.time() - end) x_data, y_data = batch x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, init_cell) out_loss = dy_loss.numpy() init_hidden_data = last_hidden.numpy() init_cell_data = last_cell.numpy() dy_loss.backward() sgd.minimize(dy_loss, grad_clip=grad_clip) ptb_model.clear_gradients() batch_time.update(Tools.time() - end) #losses.update(out_loss, batch_size) total_loss += out_loss iters += num_steps if batch_id > 0 and batch_id % log_interval == 0: progress.print(batch_id) ppl = np.exp(total_loss / iters) print(epoch_id, "ppl ", batch_id, ppl[0], sgd._global_learning_rate().numpy()) end = Tools.time() print("one ecpoh finished", epoch_id) print("time cost ", time.time() - start_time) ppl = np.exp(total_loss / iters) print("ppl ", epoch_id, ppl[0]) eval(ptb_model, valid_data) eval(ptb_model, test_data)
def train_mnist(args): epoch_num = args.epoch BATCH_SIZE = 32 trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.ce: print("ce mode") seed = 33 np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() mnist = MNIST("mnist") adam = AdamOptimizer(learning_rate=0.001) if args.use_data_parallel: mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) for epoch in range(epoch_num): total_loss = 0.0 total_acc = 0.0 total_sample = 0 batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time, losses, prefix="epoch: [{}]".format(epoch)) end = Tools.time() for batch_id, data in enumerate(train_reader()): data_time.update(Tools.time() - end) dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = mnist(img, label) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) if args.use_data_parallel: avg_loss = mnist.scale_loss(avg_loss) avg_loss.backward() mnist.apply_collective_grads() else: avg_loss.backward() adam.minimize(avg_loss) # save checkpoint mnist.clear_gradients() batch_time.update(Tools.time() - end) total_loss += avg_loss.numpy() total_acc += acc.numpy() total_sample += 1 dy_out = avg_loss.numpy()[0] losses.update(dy_out, BATCH_SIZE) if batch_id % 10 == 0: progress.print(batch_id) print("epoch %d | batch step %d, loss %0.3f acc %0.3f" % \ (epoch, batch_id, total_loss / total_sample, total_acc / total_sample)) if batch_id % 100 == 0: print("Loss at epoch {} step {}: {:}".format( epoch, batch_id, avg_loss.numpy())) end = Tools.time() mnist.eval() test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE) mnist.train() if args.ce: print("kpis\ttest_acc\t%s" % test_acc) print("kpis\ttest_cost\t%s" % test_cost) print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format( epoch, test_cost, test_acc))
def train(args): with fluid.dygraph.guard(): backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy.sort_sum_gradient = True ocr_attention = OCRAttention("ocr_attention") if Config.learning_rate_decay == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay( [50000], [Config.LR, Config.LR * 0.01]) else: learning_rate = Config.LR optimizer = fluid.optimizer.Adam(learning_rate=0.001) dy_param_init_value = {} grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(5.0 ) train_reader = data_reader.data_reader( Config.batch_size, cycle=args.total_step > 0, shuffle=True, data_type='train') infer_image= './data/data/test_images/' infer_files = './data/data/test.list' test_reader = data_reader.data_reader( Config.batch_size, cycle=False, data_type="test") def eval(): ocr_attention.eval() total_loss = 0.0 total_step = 0.0 equal_size = 0 for data in test_reader(): data_dict = get_attention_feeder_data(data) label_in = to_variable(data_dict["label_in"]) label_out = to_variable(data_dict["label_out"]) label_out._stop_gradient = True label_out.trainable = False img = to_variable(data_dict["pixel"]) prediction = ocr_attention(img, label_in) prediction = fluid.layers.reshape( prediction, [label_out.shape[0] * label_out.shape[1], -1], inplace=False) score, topk = layers.topk( prediction, 1) seq = topk.numpy() seq = seq.reshape( ( args.batch_size, -1)) mask = data_dict['mask'].reshape( (args.batch_size, -1)) seq_len = np.sum( mask, -1) trans_ref = data_dict["label_out"].reshape( (args.batch_size, -1)) for i in range( args.batch_size ): length = int(seq_len[i] -1 ) trans = seq[i][:length - 1] ref = trans_ref[i][ : length - 1] if np.array_equal( trans, ref ): equal_size += 1 total_step += args.batch_size print( "eval cost", equal_size / total_step ) total_step = 0 epoch_num = 20 if args.benchmark: epoch_num = 1 j = 0 for i in train_reader(): j += 1 if j % 100 == 0: print(j) print(j) #total_pass = len(list(train_reader())) #print(total_pass) for epoch in range(epoch_num): batch_id = 0 total_loss = 0.0 batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(399425, batch_time, data_time, prefix="epoch: [{}]".format(epoch)) end = Tools.time() for data in train_reader(): data_time.update(Tools.time() - end) total_step += 1 data_dict = get_attention_feeder_data(data) label_in = to_variable(data_dict["label_in"]) label_out = to_variable(data_dict["label_out"]) label_out._stop_gradient = True label_out.trainable = False img = to_variable(data_dict["pixel"]) prediction = ocr_attention(img, label_in) prediction = fluid.layers.reshape( prediction, [label_out.shape[0] * label_out.shape[1], -1], inplace=False) label_out = fluid.layers.reshape(label_out, [-1, 1], inplace=False) loss = fluid.layers.cross_entropy( input=prediction, label=label_out) mask = to_variable(data_dict["mask"]) loss = layers.elementwise_mul( loss, mask, axis=0) avg_loss = fluid.layers.reduce_sum(loss) total_loss += avg_loss.numpy() avg_loss.backward() optimizer.minimize(avg_loss, grad_clip=grad_clip) ocr_attention.clear_gradients() batch_time.update(Tools.time() - end) framework._dygraph_tracer()._clear_ops() if batch_id > 0 and batch_id % 50 == 0: progress.print(batch_id) print("epoch: {}, batch_id: {}, loss {}".format(epoch, batch_id, total_loss / args.batch_size / 50)) total_loss = 0.0 if total_step > 0 and total_step % 2000 == 0: ocr_attention.eval() eval() ocr_attention.train() batch_id +=1 end = Tools.time()
def train(): epoch_num = train_parameters["num_epochs"] if args.ce: epoch_num = args.epoch batch_size = train_parameters["batch_size"] trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.ce: print("ce mode") seed = 90 np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() se_resnext = SeResNeXt("se_resnext") optimizer = optimizer_setting(train_parameters) if args.use_data_parallel: se_resnext = fluid.dygraph.parallel.DataParallel(se_resnext, strategy) train_reader = paddle.batch( paddle.dataset.flowers.train(use_xmap=False), batch_size=batch_size, drop_last=True ) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) test_reader = paddle.batch( paddle.dataset.flowers.test(use_xmap=False), batch_size=32) for epoch_id in range(epoch_num): total_loss = 0.0 total_acc1 = 0.0 total_acc5 = 0.0 total_sample = 0 batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time, prefix="epoch: [{}]".format(epoch_id)) end = Tools.time() for batch_id, data in enumerate(train_reader()): data_time.update(Tools.time() - end) dy_x_data = np.array( [x[0].reshape(3, 224, 224) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape( batch_size, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True out = se_resnext(img) softmax_out = fluid.layers.softmax(out,use_cudnn=False) loss = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_loss = fluid.layers.mean(x=loss) acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5) dy_out = avg_loss.numpy() if args.use_data_parallel: avg_loss = se_resnext.scale_loss(avg_loss) avg_loss.backward() se_resnext.apply_collective_grads() else: avg_loss.backward() optimizer.minimize(avg_loss) se_resnext.clear_gradients() batch_time.update(Tools.time() - end) lr = optimizer._global_learning_rate().numpy() total_loss += dy_out total_acc1 += acc_top1.numpy() total_acc5 += acc_top5.numpy() total_sample += 1 if batch_id % 1 == 0: progress.print(batch_id) print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f lr %0.5f" % \ ( epoch_id, batch_id, total_loss / total_sample, \ total_acc1 / total_sample, total_acc5 / total_sample, lr)) end = Tools.time() if args.ce: print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample)) print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample)) print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample)) print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \ (epoch_id, batch_id, total_loss / total_sample, \ total_acc1 / total_sample, total_acc5 / total_sample)) se_resnext.eval() eval(se_resnext, test_reader) se_resnext.train()
def train(args): with fluid.dygraph.guard(): max_images_num = data_reader.max_images_num() shuffle = True data_shape = [-1] + data_reader.image_shape() #print(data_shape) A_pool = ImagePool() B_pool = ImagePool() A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle), args.batch_size)() B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle), args.batch_size)() A_test_reader = data_reader.a_test_reader() B_test_reader = data_reader.b_test_reader() cycle_gan = Cycle_Gan("cycle_gan", istrain=True) losses = [[], []] t_time = 0 optimizer1 = optimizer_setting() optimizer2 = optimizer_setting() optimizer3 = optimizer_setting() for epoch in range(args.epoch): pro_batch_time = AverageMeter('Time', ':6.3f') pro_data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(max_images_num, pro_batch_time, pro_data_time, prefix="epoch: [{}]".format(epoch)) end = Tools.time() batch_id = 0 for i in range(max_images_num): data_A = next(A_reader) data_B = next(B_reader) pro_data_time.update(Tools.time() - end) s_time = time.time() data_A = np.array([data_A[0].reshape(3, 256, 256)]).astype("float32") data_B = np.array([data_B[0].reshape(3, 256, 256)]).astype("float32") data_A = to_variable(data_A) data_B = to_variable(data_B) # optimize the g_A network fake_A, fake_B, cyc_A, cyc_B, g_A_loss, g_B_loss, idt_loss_A, idt_loss_B, cyc_A_loss, cyc_B_loss, g_loss = cycle_gan( data_A, data_B, True, False, False) g_loss_out = g_loss.numpy() g_loss.backward() vars_G = [] for param in cycle_gan.parameters(): if param.name[: 52] == "cycle_gan/Cycle_Gan_0/build_generator_resnet_9blocks": vars_G.append(param) optimizer1.minimize(g_loss, parameter_list=vars_G) cycle_gan.clear_gradients() fake_pool_B = B_pool.pool_image(fake_B).numpy() fake_pool_B = np.array([fake_pool_B[0].reshape(3, 256, 256) ]).astype("float32") fake_pool_B = to_variable(fake_pool_B) fake_pool_A = A_pool.pool_image(fake_A).numpy() fake_pool_A = np.array([fake_pool_A[0].reshape(3, 256, 256) ]).astype("float32") fake_pool_A = to_variable(fake_pool_A) # optimize the d_A network rec_B, fake_pool_rec_B = cycle_gan(data_B, fake_pool_B, False, True, False) d_loss_A = (fluid.layers.square(fake_pool_rec_B) + fluid.layers.square(rec_B - 1)) / 2.0 d_loss_A = fluid.layers.reduce_mean(d_loss_A) d_loss_A.backward() vars_da = [] for param in cycle_gan.parameters(): if param.name[: 47] == "cycle_gan/Cycle_Gan_0/build_gen_discriminator_0": vars_da.append(param) optimizer2.minimize(d_loss_A, parameter_list=vars_da) cycle_gan.clear_gradients() # optimize the d_B network rec_A, fake_pool_rec_A = cycle_gan(data_A, fake_pool_A, False, False, True) d_loss_B = (fluid.layers.square(fake_pool_rec_A) + fluid.layers.square(rec_A - 1)) / 2.0 d_loss_B = fluid.layers.reduce_mean(d_loss_B) d_loss_B.backward() vars_db = [] for param in cycle_gan.parameters(): if param.name[: 47] == "cycle_gan/Cycle_Gan_0/build_gen_discriminator_1": vars_db.append(param) optimizer3.minimize(d_loss_B, parameter_list=vars_db) cycle_gan.clear_gradients() batch_time = time.time() - s_time t_time += batch_time pro_batch_time.update(Tools.time() - end) # print( # "epoch{}; batch{}; g_loss:{}; d_A_loss: {}; d_B_loss:{} \ # ; \n g_A_loss: {}; g_A_cyc_loss: {}; g_A_idt_loss: {}\ # ; g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {}\ # ;Batch_time_cost: {:.2f}"\ # .format(epoch, batch_id, g_loss_out[0],\ # d_loss_A.numpy()[0], \ # d_loss_B.numpy()[0],\ # g_A_loss.numpy()[0],\ # cyc_A_loss.numpy()[0], \ # idt_loss_A.numpy()[0], \ # g_B_loss.numpy()[0],\ # cyc_B_loss.numpy()[0],\ # idt_loss_B.numpy()[0], \ # batch_time)) with open('logging_train.txt', 'a') as log_file: now = time.strftime("%c") log_file.write( "time: {}; epoch{}; batch{}; d_A_loss: {}; g_A_loss: {}; \ g_A_cyc_loss: {}; g_A_idt_loss: {}; d_B_loss: {}; \ g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {}; \ Batch_time_cost: {:.2f}\n" .format(now, epoch, \ batch_id, d_loss_A[0], g_A_loss[ 0], cyc_A_loss[0], \ idt_loss_A[0], d_loss_B[0], g_A_loss[0], \ cyc_B_loss[0], idt_loss_B[0], batch_time)) losses[0].append(g_A_loss[0]) losses[1].append(d_loss_A[0]) sys.stdout.flush() batch_id += 1 if batch_id % 10 == 0: progress.print(batch_id) print("epoch{}; | batch step{}; g_A_loss:{}; d_A_loss:{}" \ .format(epoch, batch_id, g_A_loss.numpy()[0], d_loss_A.numpy()[0])) end = Tools.time() if args.save_checkpoints: fluid.dygraph.save_persistables( cycle_gan.state_dict(), args.output + "/checkpoints/{}".format(epoch))