def train_mnist(args): epoch_num = args.epoch BATCH_SIZE = 64 place = fluid.CPUPlace() with fluid.dygraph.guard(place): seed = 33 np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() mnist = MNIST("mnist") adam = AdamOptimizer(learning_rate=0.001) if args.use_data_parallel: mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = mnist(img, label) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) if args.use_data_parallel: avg_loss = mnist.scale_loss(avg_loss) avg_loss.backward() mnist.apply_collective_grads() else: avg_loss.backward() adam.minimize(avg_loss) # save checkpoint mnist.clear_gradients() if batch_id % 100 == 0: print("Loss at epoch {} step {}: {:}".format( epoch, batch_id, avg_loss.numpy())) print("checkpoint saved")
def train_mnist(args, model, tokens=None): epoch_num = args.epoch BATCH_SIZE = 64 adam = AdamOptimizer(learning_rate=0.001, parameter_list=model.parameters()) train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = model.forward(img, label, tokens=tokens) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) if args.use_data_parallel: avg_loss = model.scale_loss(avg_loss) avg_loss.backward() model.apply_collective_grads() else: avg_loss.backward() adam.minimize(avg_loss) # save checkpoint model.clear_gradients() if batch_id % 1 == 0: print("Loss at epoch {} step {}: {:}".format( epoch, batch_id, avg_loss.numpy())) model.eval() test_acc = test_mnist(model, tokens=tokens) model.train() print("Loss at epoch {} , acc is: {}".format(epoch, test_acc)) save_parameters = (not args.use_data_parallel) or ( args.use_data_parallel and fluid.dygraph.parallel.Env().local_rank == 0) if save_parameters: fluid.save_dygraph(model.state_dict(), "save_temp") print("checkpoint saved")
def finetune(args): ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len) with fluid.dygraph.guard(): dataset = hub.dataset.ChnSentiCorp() tc = TransformerClassifier(num_classes=dataset.num_labels, transformer=ernie) adam = AdamOptimizer(learning_rate=1e-5, parameter_list=tc.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): state_dict, _ = fluid.load_dygraph(state_dict_path) tc.load_dict(state_dict) reader = hub.reader.ClassifyReader( dataset=dataset, vocab_path=ernie.get_vocab_path(), max_seq_len=args.max_seq_len, sp_model_path=ernie.get_spm_path(), word_dict_path=ernie.get_word_dict_path()) train_reader = reader.data_generator(batch_size=args.batch_size, phase='train') loss_sum = acc_sum = cnt = 0 # 执行epoch_num次训练 for epoch in range(args.num_epoch): # 读取训练数据进行训练 for batch_id, data in enumerate(train_reader()): input_ids = np.array(data[0][0]).astype(np.int64) position_ids = np.array(data[0][1]).astype(np.int64) segment_ids = np.array(data[0][2]).astype(np.int64) input_mask = np.array(data[0][3]).astype(np.float32) labels = np.array(data[0][4]).astype(np.int64) pred = tc(input_ids, position_ids, segment_ids, input_mask) acc = fluid.layers.accuracy(pred, to_variable(labels)) loss = fluid.layers.cross_entropy(pred, to_variable(labels)) avg_loss = fluid.layers.mean(loss) avg_loss.backward() # 参数更新 adam.minimize(avg_loss) loss_sum += avg_loss.numpy() * labels.shape[0] acc_sum += acc.numpy() * labels.shape[0] cnt += labels.shape[0] if batch_id % args.log_interval == 0: print('epoch {}: loss {}, acc {}'.format( epoch, loss_sum / cnt, acc_sum / cnt)) loss_sum = acc_sum = cnt = 0 if batch_id % args.save_interval == 0: state_dict = tc.state_dict() fluid.save_dygraph(state_dict, state_dict_path)
def test_train(self): main_prog = fluid.Program() with fluid.program_guard(main_prog): mnist = MNIST() adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) exe = fluid.Executor(self.place) start = time() img = fluid.data(name='img', shape=[None, 1, 28, 28], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') label.stop_gradient = True prediction, acc, avg_loss = mnist(img, label) adam.minimize(avg_loss) exe.run(fluid.default_startup_program()) for epoch in range(self.epoch_num): for batch_id, data in enumerate(self.train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) out = exe.run(main_prog, fetch_list=[avg_loss, acc], feed={ 'img': dy_x_data, 'label': y_data }) if batch_id % 100 == 0: print( "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}" .format(epoch, batch_id, np.array(out[0]), np.array(out[1]), time() - start)) if batch_id == 300: # The accuracy of mnist should converge over 0.9 after 300 batch. accuracy = np.array(out[1]) self.assertGreater( accuracy, 0.9, msg= "The accuracy {} of mnist should converge over 0.9 after 300 batch." .format(accuracy)) break
def finetune(args): with fluid.dygraph.guard(): resnet50_vd_10w = hub.Module(name="resnet50_vd_10w") dataset = hub.dataset.Flowers() resnet = ResNet50(num_classes=dataset.num_labels, backbone=resnet50_vd_10w) adam = AdamOptimizer(learning_rate=0.001, parameter_list=resnet.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): state_dict, _ = fluid.load_dygraph(state_dict_path) resnet.load_dict(state_dict) reader = hub.reader.ImageClassificationReader( image_width=resnet50_vd_10w.get_expected_image_width(), image_height=resnet50_vd_10w.get_expected_image_height(), images_mean=resnet50_vd_10w.get_pretrained_images_mean(), images_std=resnet50_vd_10w.get_pretrained_images_std(), dataset=dataset) train_reader = reader.data_generator(batch_size=args.batch_size, phase='train') loss_sum = acc_sum = cnt = 0 # 执行epoch_num次训练 for epoch in range(args.num_epoch): # 读取训练数据进行训练 for batch_id, data in enumerate(train_reader()): imgs = np.array(data[0][0]) labels = np.array(data[0][1]) pred = resnet(imgs) acc = fluid.layers.accuracy(pred, to_variable(labels)) loss = fluid.layers.cross_entropy(pred, to_variable(labels)) avg_loss = fluid.layers.mean(loss) avg_loss.backward() # 参数更新 adam.minimize(avg_loss) loss_sum += avg_loss.numpy() * imgs.shape[0] acc_sum += acc.numpy() * imgs.shape[0] cnt += imgs.shape[0] if batch_id % args.log_interval == 0: print('epoch {}: loss {}, acc {}'.format( epoch, loss_sum / cnt, acc_sum / cnt)) loss_sum = acc_sum = cnt = 0 if batch_id % args.save_interval == 0: state_dict = resnet.state_dict() fluid.save_dygraph(state_dict, state_dict_path)
def train(self, to_static=False): prog_trans = ProgramTranslator() prog_trans.enable(to_static) loss_data = [] with fluid.dygraph.guard(self.place): fluid.default_main_program().random_seed = SEED fluid.default_startup_program().random_seed = SEED mnist = MNIST() adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) for epoch in range(self.epoch_num): start = time() for batch_id, data in enumerate(self.train_reader()): dy_x_data = np.array([ x[0].reshape(1, 28, 28) for x in data ]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True prediction, acc, avg_loss = mnist(img, label=label) avg_loss.backward() adam.minimize(avg_loss) loss_data.append(avg_loss.numpy()[0]) # save checkpoint mnist.clear_gradients() if batch_id % 10 == 0: print( "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}" .format(epoch, batch_id, avg_loss.numpy(), acc.numpy(), time() - start)) start = time() if batch_id == 50: mnist.eval() prediction, acc, avg_loss = mnist(img, label) loss_data.append(avg_loss.numpy()[0]) # new save load check self.check_jit_save_load(mnist, [dy_x_data], [img], to_static, prediction) break return loss_data
def main(args): place = set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ Input([None, args.max_seq_len], 'int64', name='words'), Input([None, args.max_seq_len], 'int64', name='target'), Input([None], 'int64', name='length') ] labels = [Input([None, args.max_seq_len], 'int64', name='labels')] feed_list = None if args.dynamic else [ x.forward() for x in inputs + labels ] dataset = LacDataset(args) train_path = os.path.join(args.data, "train.tsv") test_path = os.path.join(args.data, "test.tsv") train_generator = create_lexnet_data_generator(args, reader=dataset, file_name=train_path, place=place, mode="train") test_generator = create_lexnet_data_generator(args, reader=dataset, file_name=test_path, place=place, mode="test") train_dataset = create_dataloader(train_generator, place, feed_list=feed_list) test_dataset = create_dataloader(test_generator, place, feed_list=feed_list) vocab_size = dataset.vocab_size num_labels = dataset.num_labels model = SeqTagging(args, vocab_size, num_labels) optim = AdamOptimizer(learning_rate=args.base_learning_rate, parameter_list=model.parameters()) model.prepare(optim, LacLoss(), ChunkEval(num_labels), inputs=inputs, labels=labels, device=args.device) if args.resume is not None: model.load(args.resume) model.fit(train_dataset, test_dataset, epochs=args.epoch, batch_size=args.batch_size, eval_freq=args.eval_freq, save_freq=args.save_freq, save_dir=args.save_dir)
def main(): # Step 0: preparation #place = paddle.fluid.CUDAPlace(0) with fluid.dygraph.guard(): # Step 1: Define training dataloader image_folder="work/dummy_data" image_list_file="work/dummy_data/list.txt" transform = TrainAugmentation(224) data = BasicDataLoader(image_folder,image_list_file,transform=transform) #TODO: create dataloader train_dataloader = fluid.io.DataLoader.from_generator(capacity=2,return_list=True) train_dataloader.set_sample_generator(data,args.batch_size) total_batch = len(data)//args.batch_size # Step 2: Create model if args.net == "basic": #TODO: create basicmodel model = PSPNet() else: raise NotImplementedError(f"args.net: {args.net} is not Supported!") # Step 3: Define criterion and optimizer criterion = Basic_SegLoss # create optimizer optimizer = AdamOptimizer(learning_rate=args.lr,parameter_list=model.parameters()) # Step 4: Training for epoch in range(1, args.num_epochs+1): train_loss = train(train_dataloader, model, criterion, optimizer, epoch, total_batch) print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}") if epoch % args.save_freq == 0 or epoch == args.num_epochs: model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}") # TODO: save model and optmizer states model_dict = model.state_dict() fluid.save_dygraph(model_dict,model_path) optim_dict = optimizer.state_dict() fluid.save_dygraph(optim_dict,model_path) print(f'----- Save model: {model_path}.pdparams') print(f'----- Save optimizer: {model_path}.pdopt')
def main(): # Step 0: preparation place = paddle.fluid.CUDAPlace(0) with fluid.dygraph.guard(place): # Step 1: Define training dataloader basic_augmentation = TrainAugmentation(image_size=256) basic_dataloader = BasicDataLoader(image_folder=args.image_folder, image_list_file=args.image_list_file, transform=basic_augmentation, shuffle=True) train_dataloader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True) train_dataloader.set_sample_generator(basic_dataloader, batch_size=args.batch_size, places=place) total_batch = int(len(basic_dataloader) / args.batch_size) # Step 2: Create model if args.net == "basic": model = BasicModel() else: raise NotImplementedError(f"args.net: {args.net} is not Supported!") # Step 3: Define criterion and optimizer criterion = Basic_SegLoss optimizer = AdamOptimizer(learning_rate=args.lr, parameter_list=model.parameters()) # create optimizer # Step 4: Training for epoch in range(1, args.num_epochs+1): train_loss = train(train_dataloader, model, criterion, optimizer, epoch, total_batch) print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}") if epoch % args.save_freq == 0 or epoch == args.num_epochs: model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}") # TODO: save model and optmizer states model_dict = model.state_dict() fluid.save_dygraph(model_dict, model_path) optimizer_dict = optimizer.state_dict() fluid.save_dygraph(optimizer_dict, model_path) print(f'----- Save model: {model_path}.pdparams') print(f'----- Save optimizer: {model_path}.pdopt')
def train_mnist(args): epoch_num = args.epoch BATCH_SIZE = 64 seed = 33 np.random.seed(seed) start_prog = fluid.Program() main_prog = fluid.Program() start_prog.random_seed = seed main_prog.random_seed = seed with fluid.program_guard(main_prog, start_prog): exe = fluid.Executor(fluid.CPUPlace()) mnist = MNIST("mnist") adam = AdamOptimizer(learning_rate=0.001) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) img = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) adam.minimize(avg_loss) out = exe.run(fluid.default_startup_program()) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): static_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape([BATCH_SIZE, 1]) fetch_list = [avg_loss.name] out = exe.run(fluid.default_main_program(), feed={ "pixel": static_x_data, "label": y_data }, fetch_list=fetch_list) static_out = out[0] if batch_id % 100 == 0: print("epoch: {}, batch_id: {}, loss: {}".format( epoch, batch_id, static_out))
def test_save_load_same_result(self): program_translator = ProgramTranslator() x_data = np.random.randn(30, 10, 32).astype('float32') batch_num = 3 with fluid.dygraph.guard(place): program_translator.enable(True) x = fluid.dygraph.to_variable(x_data) net = Linear(32, 64) adam = AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) for i in range(batch_num): static_out, static_loss = net(x) # Update parameters static_loss.backward() adam.minimize(static_loss) net.clear_gradients() # Save parameters fluid.save_dygraph(net.state_dict(), self.model_path) # minimize() will update parameter, call net() to get output and avg_loss. # Switch into eval mode. net.eval() static_out, static_loss = net(x) # load parameters into dygraph with fluid.dygraph.guard(place): dygraph_net = Linear(32, 64) # Load parameters model_dict, _ = fluid.load_dygraph(self.model_path) dygraph_net.set_dict(model_dict) # Switch into eval mode. dygraph_net.eval() x = fluid.dygraph.to_variable(x_data) # predict output program_translator.enable(False) dygraph_out, dygraph_loss = dygraph_net(x) self.assertTrue(np.allclose(dygraph_out.numpy(), static_out.numpy())) self.assertTrue(np.allclose(dygraph_loss.numpy(), static_loss.numpy()))
def main(args): place = set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ Input( [None, None], 'int64', name='words'), Input( [None], 'int64', name='length'), Input( [None, None], 'int64', name='target') ] labels = [Input([None, None], 'int64', name='labels')] feed_list = None if args.dynamic else [ x.forward() for x in inputs + labels ] dataset = LacDataset(args) train_dataset = LacDataLoader(args, place, phase="train") vocab_size = dataset.vocab_size num_labels = dataset.num_labels model = SeqTagging(args, vocab_size, num_labels, mode="train") optim = AdamOptimizer( learning_rate=args.base_learning_rate, parameter_list=model.parameters()) model.prepare( optim, LacLoss(), ChunkEval(num_labels), inputs=inputs, labels=labels, device=args.device) if args.init_from_checkpoint: model.load(args.init_from_checkpoint) if args.init_from_pretrain_model: model.load(args.init_from_pretrain_model, reset_optimizer=True) model.fit(train_dataset.dataloader, epochs=args.epoch, batch_size=args.batch_size, eval_freq=args.eval_freq, save_freq=args.save_freq, save_dir=args.save_dir)
def train(self, to_static=False): paddle.seed(SEED) mnist = MNIST() if to_static: print("Successfully to apply @to_static.") mnist = paddle.jit.to_static(mnist) adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) scaler = paddle.amp.GradScaler(init_loss_scaling=1024) loss_data = [] for epoch in range(self.epoch_num): start = time() for batch_id, data in enumerate(self.train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(dy_x_data) label = paddle.to_tensor(y_data) label.stop_gradient = True with paddle.amp.auto_cast(): prediction, acc, avg_loss = mnist(img, label=label) scaled = scaler.scale(avg_loss) scaled.backward() scaler.minimize(adam, scaled) loss_data.append(avg_loss.numpy()[0]) # save checkpoint mnist.clear_gradients() if batch_id % 10 == 0: print( "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}" .format(epoch, batch_id, avg_loss.numpy(), acc.numpy(), time() - start)) start = time() if batch_id == 50: break return loss_data
def func_out_scale_acc(self): seed = 1000 lr = 0.001 weight_quantize_type = 'abs_max' activation_quantize_type = 'moving_average_abs_max' imperative_out_scale = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quantize_type) with fluid.dygraph.guard(): np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed lenet = ImperativeLenet() lenet = fix_model_dict(lenet) imperative_out_scale.quantize(lenet) reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=32, drop_last=True) adam = AdamOptimizer(learning_rate=lr, parameter_list=lenet.parameters()) loss_list = train_lenet(lenet, reader, adam) lenet.eval() param_save_path = "test_save_quantized_model/lenet.pdparams" save_dict = lenet.state_dict() paddle.save(save_dict, param_save_path) save_path = "./dynamic_outscale_infer_model/lenet" imperative_out_scale.save_quantized_model( layer=lenet, path=save_path, input_spec=[ paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ]) for i in range(len(loss_list) - 1): self.assertTrue(loss_list[i] > loss_list[i + 1], msg='Failed to do the imperative qat.')
def main(): #Place = paddle.fluid.CPUPlace() Place = paddle.fluid.CUDAPlace(0) with fluid.dygraph.guard(Place): transform = Transform(256) dataload = Dataloader(args.image_folder, args.image_list_file, transform, True) train_load = fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False) train_load.set_sample_generator(dataload, batch_size=args.batch_size, places=Place) total_batch = int(len(dataload) / args.batch_size) if args.net == 'deeplab': model = DeepLab(59) else: print("Other model haven't finished....") costFunc = SegLoss adam = AdamOptimizer(learning_rate=args.lr, parameter_list=model.parameters()) for epoch in range(1, args.num_epochs + 1): train_loss = train(train_load, model, costFunc, adam, epoch, total_batch) print( f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss}" ) if epoch % args.save_freq == 0 or epoch == args.num_epochs: model_path = os.path.join( args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}") model_dict = model.state_dict() fluid.save_dygraph(model_dict, model_path) optimizer_dict = optimizer.state_dict() fluid.save_dygraph(optimizer_dict, model_path) print(f'----- Save model: {model_path}.pdparams') print(f'----- Save optimizer: {model_path}.pdopt')
def test_save_quantized_model(self): lr = 0.001 load_param_path = "test_save_quantized_model/lenet.pdparams" save_path = "./dynamic_outscale_infer_model_from_checkpoint/lenet" weight_quantize_type = 'abs_max' activation_quantize_type = 'moving_average_abs_max' imperative_out_scale = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quantize_type) with fluid.dygraph.guard(): lenet = ImperativeLenet() load_dict = paddle.load(load_param_path) imperative_out_scale.quantize(lenet) lenet.set_dict(load_dict) reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32, drop_last=True) adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) loss_list = train_lenet(lenet, reader, adam) lenet.eval() imperative_out_scale.save_quantized_model( layer=lenet, path=save_path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) for i in range(len(loss_list) - 1): self.assertTrue( loss_list[i] > loss_list[i + 1], msg='Failed to do the imperative qat.')
def train_mnist(args): epoch_num = 5 BATCH_SIZE = 256 place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() mnist = MNIST("mnist") adam = AdamOptimizer(learning_rate=0.001) if args.use_data_parallel: mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_sampler( paddle.dataset.mnist.train(), batch_size=BATCH_SIZE) else: train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) for epoch in range(epoch_num): # define eval batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time, losses, prefix="epoch: [{}]".format(epoch)) end = Tools.time() for batch_id, data in enumerate(train_reader()): data_time.update(Tools.time() - end) dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = mnist(img, label) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) if args.use_data_parallel: avg_loss = mnist.scale_loss(avg_loss) avg_loss.backward() mnist.apply_collective_grads() else: avg_loss.backward() adam.minimize(avg_loss) # save checkpoint mnist.clear_gradients() batch_time.update(Tools.time() - end) dy_out = avg_loss.numpy()[0] losses.update(dy_out, BATCH_SIZE) if batch_id % 10 == 0: progress.print(batch_id) end = Tools.time() mnist.eval() test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE) mnist.train() print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format( epoch, test_cost, test_acc)) fluid.dygraph.save_persistables(mnist.state_dict(), "save_dir") print("checkpoint saved") inference_mnist()
def func_out_scale_acc(self): paddle.disable_static() seed = 1000 lr = 0.1 qat = ImperativeQuantAware() np.random.seed(seed) reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=512, drop_last=True) lenet = ImperativeLenetWithSkipQuant() lenet = fix_model_dict(lenet) qat.quantize(lenet) adam = AdamOptimizer(learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() loss_list = train_lenet(lenet, reader, adam) lenet.eval() path = "./save_dynamic_quant_infer_model/lenet" save_dir = "./save_dynamic_quant_infer_model" qat.save_quantized_model(layer=lenet, path=path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model( dirname=save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) model_ops = inference_program.global_block().ops conv2d_count, matmul_count = 0, 0 conv2d_skip_count, matmul_skip_count = 0, 0 find_conv2d = False find_matmul = False for i, op in enumerate(model_ops): if op.type == 'conv2d': find_conv2d = True if op.has_attr("skip_quant"): conv2d_skip_count += 1 if conv2d_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) conv2d_count += 1 if op.type == 'matmul': find_matmul = True if op.has_attr("skip_quant"): matmul_skip_count += 1 if matmul_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) matmul_count += 1 if find_conv2d: self.assertTrue(conv2d_skip_count == 1) if find_matmul: self.assertTrue(matmul_skip_count == 1)
def func_qat(self): self.set_vars() imperative_qat = ImperativeQuantAware( weight_quantize_type=self.weight_quantize_type, activation_quantize_type=self.activation_quantize_type, fuse_conv_bn=self.fuse_conv_bn) with fluid.dygraph.guard(): # For CI coverage conv1 = Conv2D( in_channels=3, out_channels=2, kernel_size=3, stride=1, padding=1, padding_mode='replicate') quant_conv1 = QuantizedConv2D(conv1) data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') quant_conv1(fluid.dygraph.to_variable(data)) conv_transpose = Conv2DTranspose(4, 6, (3, 3)) quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose) x_var = paddle.uniform( (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0) quant_conv_transpose(x_var) seed = 1 np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed lenet = ImperativeLenet() lenet = fix_model_dict(lenet) imperative_qat.quantize(lenet) adam = AdamOptimizer( learning_rate=0.001, parameter_list=lenet.parameters()) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=32, drop_last=True) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32) epoch_num = 1 for epoch in range(epoch_num): lenet.train() for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) acc = fluid.layers.accuracy(out, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}". format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) if batch_id == 500: # For shortening CI time break lenet.eval() eval_acc_top1_list = [] for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) acc_top1 = fluid.layers.accuracy( input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy( input=out, label=label, k=5) if batch_id % 100 == 0: eval_acc_top1_list.append(float(acc_top1.numpy())) _logger.info( "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". format(epoch, batch_id, acc_top1.numpy(), acc_top5.numpy())) # check eval acc eval_acc_top1 = sum(eval_acc_top1_list) / len( eval_acc_top1_list) print('eval_acc_top1', eval_acc_top1) self.assertTrue( eval_acc_top1 > 0.9, msg="The test acc {%f} is less than 0.9." % eval_acc_top1) # test the correctness of `paddle.jit.save` data = next(test_reader()) test_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) test_img = fluid.dygraph.to_variable(test_data) label = fluid.dygraph.to_variable(y_data) lenet.eval() fp32_out = lenet(test_img) fp32_acc = fluid.layers.accuracy(fp32_out, label).numpy() with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir: # save inference quantized model imperative_qat.save_quantized_model( layer=lenet, path=os.path.join(tmpdir, "lenet"), input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ], onnx_format=self.onnx_format) print('Quantized model saved in %s' % tmpdir) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model( dirname=tmpdir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX) quant_out, = exe.run(inference_program, feed={feed_target_names[0]: test_data}, fetch_list=fetch_targets) paddle.disable_static() quant_out = fluid.dygraph.to_variable(quant_out) quant_acc = fluid.layers.accuracy(quant_out, label).numpy() paddle.enable_static() delta_value = fp32_acc - quant_acc self.assertLess(delta_value, self.diff_threshold)
def finetune(args): module = hub.Module(name="ernie", max_seq_len=args.max_seq_len) # Use the appropriate tokenizer to preprocess the data set # For ernie_tiny, it will do word segmentation to get subword. More details: https://www.jiqizhixin.com/articles/2019-11-06-9 if module.name == "ernie_tiny": tokenizer = hub.ErnieTinyTokenizer( vocab_file=module.get_vocab_path(), spm_path=module.get_spm_path(), word_dict_path=module.get_word_dict_path(), ) else: tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path()) dataset = hub.dataset.ChnSentiCorp(tokenizer=tokenizer, max_seq_len=args.max_seq_len) with fluid.dygraph.guard(): tc = TransformerClassifier(num_classes=dataset.num_labels, transformer=module) adam = AdamOptimizer(learning_rate=1e-5, parameter_list=tc.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): state_dict, _ = fluid.load_dygraph(state_dict_path) tc.load_dict(state_dict) loss_sum = acc_sum = cnt = 0 for epoch in range(args.num_epoch): for batch_id, data in enumerate( dataset.batch_records_generator( phase="train", batch_size=args.batch_size, shuffle=True, pad_to_batch_max_seq_len=False)): batch_size = len(data["input_ids"]) input_ids = np.array(data["input_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) position_ids = np.array(data["position_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) segment_ids = np.array(data["segment_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) input_mask = np.array(data["input_mask"]).astype( np.float32).reshape([batch_size, -1, 1]) labels = np.array(data["label"]).astype(np.int64).reshape( [batch_size, 1]) pred = tc(input_ids, position_ids, segment_ids, input_mask) acc = fluid.layers.accuracy(pred, to_variable(labels)) loss = fluid.layers.cross_entropy(pred, to_variable(labels)) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) loss_sum += avg_loss.numpy() * labels.shape[0] acc_sum += acc.numpy() * labels.shape[0] cnt += labels.shape[0] if batch_id % args.log_interval == 0: print('epoch {}: loss {}, acc {}'.format( epoch, loss_sum / cnt, acc_sum / cnt)) loss_sum = acc_sum = cnt = 0 if batch_id % args.save_interval == 0: state_dict = tc.state_dict() fluid.save_dygraph(state_dict, state_dict_path)
def train_mnist(args): epoch_num = args.epoch BATCH_SIZE = 32 trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.ce: print("ce mode") seed = 33 np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() mnist = MNIST("mnist") adam = AdamOptimizer(learning_rate=0.001) if args.use_data_parallel: mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) for epoch in range(epoch_num): total_loss = 0.0 total_acc = 0.0 total_sample = 0 batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time, losses, prefix="epoch: [{}]".format(epoch)) end = Tools.time() for batch_id, data in enumerate(train_reader()): data_time.update(Tools.time() - end) dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = mnist(img, label) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) if args.use_data_parallel: avg_loss = mnist.scale_loss(avg_loss) avg_loss.backward() mnist.apply_collective_grads() else: avg_loss.backward() adam.minimize(avg_loss) # save checkpoint mnist.clear_gradients() batch_time.update(Tools.time() - end) total_loss += avg_loss.numpy() total_acc += acc.numpy() total_sample += 1 dy_out = avg_loss.numpy()[0] losses.update(dy_out, BATCH_SIZE) if batch_id % 10 == 0: progress.print(batch_id) print("epoch %d | batch step %d, loss %0.3f acc %0.3f" % \ (epoch, batch_id, total_loss / total_sample, total_acc / total_sample)) if batch_id % 100 == 0: print("Loss at epoch {} step {}: {:}".format( epoch, batch_id, avg_loss.numpy())) end = Tools.time() mnist.eval() test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE) mnist.train() if args.ce: print("kpis\ttest_acc\t%s" % test_acc) print("kpis\ttest_cost\t%s" % test_cost) print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format( epoch, test_cost, test_acc))
def test_qat_acc(self): def _build_static_lenet(main, startup, is_test=False, seed=1000): with fluid.unique_name.guard(): with fluid.program_guard(main, startup): main.random_seed = seed startup.random_seed = seed img = fluid.layers.data( name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data( name='label', shape=[1], dtype='int64') prediction = StaticLenet(img) if not is_test: loss = fluid.layers.cross_entropy( input=prediction, label=label) avg_loss = fluid.layers.mean(loss) else: avg_loss = prediction return img, label, avg_loss reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32, drop_last=True) weight_quantize_type = 'abs_max' activation_quant_type = 'moving_average_abs_max' param_init_map = {} seed = 1000 lr = 0.001 # imperative train _logger.info( "--------------------------dynamic graph qat--------------------------" ) imperative_qat = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quant_type, quantizable_layer_type=[ 'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh', 'Swish' ]) with fluid.dygraph.guard(): np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed lenet = ImperativeLenet() fixed_state = {} for name, param in lenet.named_parameters(): p_shape = param.numpy().shape p_value = param.numpy() if name.endswith("bias"): value = np.zeros_like(p_value).astype('float32') else: value = np.random.normal( loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( p_shape).astype('float32') fixed_state[name] = value param_init_map[param.name] = value lenet.set_dict(fixed_state) imperative_qat.quantize(lenet) adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() for batch_id, data in enumerate(reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() dynamic_loss_rec.append(avg_loss.numpy()[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', avg_loss.numpy())) if batch_id > 500: break lenet.eval() paddle.jit.save( layer=lenet, path="./dynamic_mnist/model", input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) # static graph train _logger.info( "--------------------------static graph qat--------------------------" ) static_loss_rec = [] if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) main = fluid.Program() infer = fluid.Program() startup = fluid.Program() static_img, static_label, static_loss = _build_static_lenet( main, startup, False, seed) infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, seed) with fluid.unique_name.guard(): with fluid.program_guard(main, startup): opt = AdamOptimizer(learning_rate=lr) opt.minimize(static_loss) scope = core.Scope() with fluid.scope_guard(scope): exe.run(startup) for param in main.all_parameters(): param_tensor = scope.var(param.name).get_tensor() param_tensor.set(param_init_map[param.name], place) main_graph = IrGraph(core.Graph(main.desc), for_test=False) infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quantize_type, quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) add_quant_dequant_pass = AddQuantDequantPass( scope=scope, place=place, quantizable_op_type=[ 'relu', 'leaky_relu', 'relu6', 'tanh', 'swish' ]) transform_pass.apply(main_graph) transform_pass.apply(infer_graph) add_quant_dequant_pass.apply(main_graph) add_quant_dequant_pass.apply(infer_graph) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=static_loss.name, build_strategy=build_strategy) feeder = fluid.DataFeeder( feed_list=[static_img, static_label], place=place) with fluid.scope_guard(scope): for batch_id, data in enumerate(reader()): loss_v, = exe.run(binary, feed=feeder.feed(data), fetch_list=[static_loss]) static_loss_rec.append(loss_v[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', loss_v)) save_program = infer_graph.to_program() with fluid.scope_guard(scope): fluid.io.save_inference_model("./static_mnist", [infer_img.name], [infer_pre], exe, save_program) rtol = 1e-08 atol = 1e-10 for i, (loss_d, loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): diff = np.abs(loss_d - loss_s) if diff > (atol + rtol * np.abs(loss_s)): _logger.info( "diff({}) at {}, dynamic loss = {}, static loss = {}". format(diff, i, loss_d, loss_s)) break self.assertTrue( np.allclose( np.array(dynamic_loss_rec), np.array(static_loss_rec), rtol=rtol, atol=atol, equal_nan=True), msg='Failed to do the imperative qat.')
layer.clear_gradients() if batch_id % 200 == 0: print("Loss at step {}: {:}".format(batch_id, avg_loss.numpy())) return avg_loss ''' Part 3. Train & Save ''' # enable dygraph mode place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace() fluid.enable_dygraph(place) # create network mnist = MNIST() adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) # create train data loader train_reader = paddle.batch(reader_decorator(paddle.dataset.mnist.train()), batch_size=BATCH_SIZE, drop_last=True) train_loader = fluid.io.DataLoader.from_generator(capacity=5) train_loader.set_sample_list_generator(train_reader, places=place) # train for epoch in range(EPOCH_NUM): train_one_epoch(mnist, train_loader) # save fluid.dygraph.jit.save(layer=mnist, model_path=MODEL_PATH) ''' Part 4. Load & Inference ''' # load model by jit.load & inference
if label is not None: acc = fluid.layers.accuracy(input=x, label=label) return x, acc else: return x place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) with fluid.dygraph.guard(place): epoch_num = 5 BATCH_SIZE = 64 strategy = fluid.dygraph.parallel.prepare_context() mnist = MNIST("mnist") # sgd = SGDOptimizer(learning_rate=0.001) sgd = AdamOptimizer(learning_rate=0.001) # sgd = MomentumOptimizer(learning_rate=0.001, momentum=0.9) mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader) for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
def train(): place = fluid.CPUPlace() with fluid.dygraph.guard(place): pix2pix_gan = build_pix2pix_gan('pix2pix_gan') discriminator_optimizer = AdamOptimizer(learning_rate=2e-4, beta1=0.5) generator_optimizer = AdamOptimizer(learning_rate=2e-4, beta1=0.5) real_dataset, input_dataset = prepare_dataset(data_dir, is_train=True) real_test, input_test = prepare_dataset(data_dir, is_train=False) epoch = 0 if os.path.exists('./model'): print('load prev checkpoint...') model, _ = fluid.dygraph.load_persistables('./model') pix2pix_gan.load_dict(model) checkpoint = open("./checkpoint.txt", "r") epoch = int(checkpoint.read()) + 1 checkpoint.close() while epoch < num_epochs: print("Epoch id: ", epoch) total_loss_gen = 0 total_loss_disc = 0 seed = np.random.randint(1000) np.random.seed(seed) np.random.shuffle(real_dataset) np.random.seed(seed) np.random.shuffle(input_dataset) for tar, inpt in batch_generator(real_dataset, input_dataset, batch_size): target = to_variable(tar) input_image = to_variable(inpt) gen_loss, disc_generated = pix2pix_gan(input_image, target, None, True) gen_loss.backward() vars_G = [] for parm in pix2pix_gan.parameters(): if parm.name[:43] == 'pix2pix_gan/build_pix2pix_gan_0/generator_0': vars_G.append(parm) generator_optimizer.minimize(gen_loss, parameter_list=vars_G) pix2pix_gan.clear_gradients() disc_loss = pix2pix_gan(input_image, target, disc_generated, False) disc_loss.backward() vars_D = [] for parm in pix2pix_gan.parameters(): if parm.name[:47] == 'pix2pix_gan/build_pix2pix_gan_0/discriminator_0': vars_D.append(parm) discriminator_optimizer.minimize(disc_loss, parameter_list=vars_D) pix2pix_gan.clear_gradients() total_loss_gen += gen_loss.numpy()[0] total_loss_disc += disc_loss.numpy()[0] print("Total generator loss: ", total_loss_gen) print("Total discriminator loss: ", total_loss_disc) if epoch % 10 == 0: # save checkpoint fluid.dygraph.save_persistables(pix2pix_gan.state_dict(), "./model") checkpoint = open("./checkpoint.txt", "w") checkpoint.write(str(epoch)) checkpoint.close() input_image = to_variable(input_test) generate_and_save_images(pix2pix_gan, input_image, epoch) epoch += 1
def test_gnn_float32(self): seed = 90 startup = fluid.Program() startup.random_seed = seed main = fluid.Program() main.random_seed = seed scope = fluid.core.Scope() with new_program_scope(main=main, startup=startup, scope=scope): features = fluid.layers.data(name='features', shape=[1, 100, 50], dtype='float32', append_batch_size=False) # Use selected rows when it's supported. adj = fluid.layers.data(name='adj', shape=[1, 100, 100], dtype='float32', append_batch_size=False) labels = fluid.layers.data(name='labels', shape=[100, 1], dtype='int64', append_batch_size=False) model = GCN('test_gcn', 50) logits = model(features, adj) logits = fluid.layers.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. loss = fluid.layers.softmax_with_cross_entropy(logits, labels) loss = fluid.layers.reduce_sum(loss) adam = AdamOptimizer(learning_rate=1e-3) adam.minimize(loss) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(startup) static_loss = exe.run(feed={ 'features': np.ones([1, 100, 50], dtype=np.float32), 'adj': np.ones([1, 100, 100], dtype=np.float32), 'labels': np.ones([100, 1], dtype=np.int64) }, fetch_list=[loss])[0] static_weight = np.array( scope.find_var(model.gc.weight.name).get_tensor()) with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed features = np.ones([1, 100, 50], dtype=np.float32) # Use selected rows when it's supported. adj = np.ones([1, 100, 100], dtype=np.float32) labels = np.ones([100, 1], dtype=np.int64) model = GCN('test_gcn', 50) logits = model(to_variable(features), to_variable(adj)) logits = fluid.layers.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. loss = fluid.layers.softmax_with_cross_entropy( logits, to_variable(labels)) loss = fluid.layers.reduce_sum(loss) loss.backward() adam = AdamOptimizer(learning_rate=1e-3) adam.minimize(loss) model.clear_gradients() loss_value = loss.numpy() model_gc_weight_value = model.gc.weight.numpy() with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed features2 = np.ones([1, 100, 50], dtype=np.float32) # Use selected rows when it's supported. adj2 = np.ones([1, 100, 100], dtype=np.float32) labels2 = np.ones([100, 1], dtype=np.int64) model2 = GCN('test_gcn', 50) logits2 = model2(to_variable(features2), to_variable(adj2)) logits2 = fluid.layers.reshape(logits2, logits2.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. loss2 = fluid.layers.softmax_with_cross_entropy( logits2, to_variable(labels2)) loss2 = fluid.layers.reduce_sum(loss2) loss2.backward() adam2 = AdamOptimizer(learning_rate=1e-3) adam2.minimize(loss2) model2.clear_gradients() loss2_value = loss2.numpy() model2_gc_weight_value = model2.gc.weight.numpy() self.assertEqual(static_loss, loss_value) self.assertTrue(np.allclose(static_weight, model_gc_weight_value)) self.assertEqual(static_loss, loss2_value) self.assertTrue(np.allclose(static_weight, model2_gc_weight_value)) sys.stderr.write('%s %s\n' % (static_loss, loss_value))
def test_qat_save(self): imperative_qat = ImperativeQuantAware( weight_quantize_type='abs_max', activation_quantize_type='moving_average_abs_max', quantizable_layer_type=[ 'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh', 'Swish' ]) with fluid.dygraph.guard(): lenet = ImperativeLenet() imperative_qat.quantize(lenet) adam = AdamOptimizer( learning_rate=0.001, parameter_list=lenet.parameters()) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=32, drop_last=True) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32) epoch_num = 1 for epoch in range(epoch_num): lenet.train() for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) acc = fluid.layers.accuracy(out, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}". format(epoch, batch_id, avg_loss.numpy(), acc.numpy())) lenet.eval() for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) acc_top1 = fluid.layers.accuracy( input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy( input=out, label=label, k=5) if batch_id % 100 == 0: _logger.info( "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". format(epoch, batch_id, acc_top1.numpy(), acc_top5.numpy())) # save weights model_dict = lenet.state_dict() fluid.save_dygraph(model_dict, "save_temp") # test the correctness of `paddle.jit.save` data = next(test_reader()) test_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') test_img = fluid.dygraph.to_variable(test_data) lenet.eval() before_save = lenet(test_img) # save inference quantized model path = "./qat_infer_model/lenet" save_dir = "./qat_infer_model" paddle.jit.save( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model( dirname=save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX) after_save, = exe.run(inference_program, feed={feed_target_names[0]: test_data}, fetch_list=fetch_targets) self.assertTrue( np.allclose(after_save, before_save.numpy()), msg='Failed to save the inference quantized model.')
def finetune(args): module = hub.Module(name="ernie", max_seq_len=args.max_seq_len) # Use the appropriate tokenizer to preprocess the data set tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path()) dataset = hub.dataset.MSRA_NER(tokenizer=tokenizer, max_seq_len=args.max_seq_len) with fluid.dygraph.guard(): ts = TransformerSeqLabeling(num_classes=dataset.num_labels, transformer=module) adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): state_dict, _ = fluid.load_dygraph(state_dict_path) ts.load_dict(state_dict) loss_sum = total_infer = total_label = total_correct = cnt = 0 for epoch in range(args.num_epoch): for batch_id, data in enumerate( dataset.batch_records_generator( phase="train", batch_size=args.batch_size, shuffle=True, pad_to_batch_max_seq_len=False)): batch_size = len(data["input_ids"]) input_ids = np.array(data["input_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) position_ids = np.array(data["position_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) segment_ids = np.array(data["segment_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) input_mask = np.array(data["input_mask"]).astype( np.float32).reshape([batch_size, -1, 1]) labels = np.array(data["label"]).astype(np.int64).reshape( -1, 1) seq_len = np.array(data["seq_len"]).astype(np.int64).reshape( -1, 1) pred, ret_infers = ts(input_ids, position_ids, segment_ids, input_mask) loss = fluid.layers.cross_entropy(pred, to_variable(labels)) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) loss_sum += avg_loss.numpy() * labels.shape[0] label_num, infer_num, correct_num = chunk_eval( labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1) cnt += labels.shape[0] total_infer += infer_num total_label += label_num total_correct += correct_num if batch_id % args.log_interval == 0: precision, recall, f1 = calculate_f1( total_label, total_infer, total_correct) print('epoch {}: loss {}, f1 {} recall {} precision {}'. format(epoch, loss_sum / cnt, f1, recall, precision)) loss_sum = total_infer = total_label = total_correct = cnt = 0 if batch_id % args.save_interval == 0: state_dict = ts.state_dict() fluid.save_dygraph(state_dict, state_dict_path)
def train_mnist(args): epoch_num = args.epoch BATCH_SIZE = 64 place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if args.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.ce: print("ce mode") seed = 33 np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() mnist = MNIST() adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) if args.use_data_parallel: mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) train_reader = paddle.batch(reader_decorator( paddle.dataset.mnist.train()), batch_size=BATCH_SIZE, drop_last=True) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) test_reader = paddle.batch(reader_decorator( paddle.dataset.mnist.test()), batch_size=BATCH_SIZE, drop_last=True) train_loader = fluid.io.DataLoader.from_generator( capacity=10, use_multiprocess=True) train_loader.set_sample_list_generator(train_reader, places=place) test_loader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True) test_loader.set_sample_list_generator(test_reader, places=place) total_train_time = 0 for epoch in range(epoch_num): stime = time.time() for batch_id, data in enumerate(train_loader()): img, label = data label.stop_gradient = True cost, acc = mnist(img, label) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) if args.use_data_parallel: avg_loss = mnist.scale_loss(avg_loss) avg_loss.backward() mnist.apply_collective_grads() else: avg_loss.backward() adam.minimize(avg_loss) # save checkpoint mnist.clear_gradients() if batch_id % 100 == 0: print("Loss at epoch {} step {}: {:}".format( epoch, batch_id, avg_loss.numpy())) total_train_time += (time.time() - stime) mnist.eval() test_cost, test_acc = test_mnist(test_loader, mnist, BATCH_SIZE) mnist.train() if args.ce: print("kpis\ttest_acc\t%s" % test_acc) print("kpis\ttest_cost\t%s" % test_cost) print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format( epoch, test_cost, test_acc)) save_parameters = (not args.use_data_parallel) or ( args.use_data_parallel and fluid.dygraph.parallel.Env().local_rank == 0) if save_parameters: fluid.save_dygraph(mnist.state_dict(), "save_temp") print("checkpoint saved") inference_mnist()
def main(): # Step 0: preparation writer = LogWriter(logdir="./log/scalar") place = paddle.fluid.CUDAPlace(0) with fluid.dygraph.guard(place): # Step 1: Define training dataloader image_folder = "" image_list_file = "dummy_data/fabric_list.txt" transform = Transform() #Normalize2() # [0,255]-->[0,1] x_data = DataLoader(image_folder, image_list_file, transform=transform) x_dataloader = fluid.io.DataLoader.from_generator(capacity=2, return_list=True) x_dataloader.set_sample_generator(x_data, args.batch_size) total_batch = len(x_data) // args.batch_size # Step 2: Create model if args.net == "basic": D = Discriminator() G = Generator() E = Invertor() else: raise NotImplementedError( f"args.net: {args.net} is not Supported!") # Step 3: Define criterion and optimizer criterion = Basic_Loss D_optim = AdamOptimizer(learning_rate=args.lr, parameter_list=D.parameters()) G_optim = AdamOptimizer(learning_rate=args.lr, parameter_list=G.parameters()) E_optim = AdamOptimizer(learning_rate=args.lr, parameter_list=E.parameters()) G_loss_meter = AverageMeter() D_loss_meter = AverageMeter() E_loss_meter = AverageMeter() D.train() G.train() E.train() # Step 4: Slight Training iteration = -1 is_slight_Train = True for epoch in range(1, args.epoch_num + 1): #optim Discriminator for (x, x_labels) in x_dataloader(): n = x.shape[0] if is_slight_Train: iteration += 1 x = fluid.layers.cast(x, dtype="float32") x = fluid.layers.transpose(x, perm=[0, 3, 1, 2]) preds_x = D(x) preds_x_array = preds_x.numpy() #print("D(x),1",preds_array.shape, np.mean(preds_array)) writer.add_scalar(tag="D(x)=1", step=iteration, value=np.mean(preds_x_array)) if np.mean(preds_x_array) >= 0.98: is_slight_Train = False z = np.random.rand(n, 64) zeros = np.zeros((n, 1)) z = to_variable(z) zeros = to_variable(zeros) z = fluid.layers.cast(z, dtype="float32") zeros = fluid.layers.cast(zeros, dtype="int64") preds_fx = D(G(z)) preds_fx_array = preds_fx.numpy() writer.add_scalar(tag="D(G(z))=0", step=iteration, value=np.mean(preds_fx_array)) D_loss = criterion(preds_x, x_labels) + criterion( preds_fx, zeros) D_loss.backward() D_optim.minimize(D_loss) D.clear_gradients() D_loss_meter.update(D_loss.numpy()[0], n) writer.add_scalar(tag="D_loss", step=iteration, value=D_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average D Loss: {D_loss_meter.avg:4f}, ") z = np.random.rand(n, 64) ones = np.ones((n, 1)) z = to_variable(z) ones = to_variable(ones) z = fluid.layers.cast(z, dtype="float32") ones = fluid.layers.cast(ones, dtype="int64") preds = D(G(z)) preds_array = preds.numpy() writer.add_scalar(tag="D(G(z))=1", step=iteration, value=np.mean(preds_array)) G_loss = criterion(preds, ones) G_loss.backward() G_optim.minimize(G_loss) G.clear_gradients() G_loss_meter.update(G_loss.numpy()[0], n) writer.add_scalar(tag="G_loss", step=iteration, value=G_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average G Loss: {G_loss_meter.avg:4f}") if epoch % args.save_freq == 0 or epoch == args.epoch_num or not is_slight_Train: D_model_path = os.path.join(args.checkpoint_folder, f"D_{args.net}-Epoch-{epoch}") G_model_path = os.path.join(args.checkpoint_folder, f"G_{args.net}-Epoch-{epoch}") # save model and optmizer states model_dict = D.state_dict() fluid.save_dygraph(model_dict, D_model_path) optim_dict = D_optim.state_dict() fluid.save_dygraph(optim_dict, D_model_path) model_dict = G.state_dict() fluid.save_dygraph(model_dict, G_model_path) optim_dict = G_optim.state_dict() fluid.save_dygraph(optim_dict, G_model_path) print( f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams' ) if not is_slight_Train: break # Step 5: full training for Generator and Discriminator D_optim = AdamOptimizer(learning_rate=args.lr * 10, parameter_list=D.parameters()) G_optim = AdamOptimizer(learning_rate=args.lr * 10, parameter_list=G.parameters()) G_loss_meter = AverageMeter() D_loss_meter = AverageMeter() for epoch in range(1, args.epoch_num + 1): for (x, x_labels) in x_dataloader(): n = x.shape[0] iteration += 1 x = fluid.layers.cast(x, dtype="float32") x = fluid.layers.transpose(x, perm=[0, 3, 1, 2]) preds1 = D(x) preds_array = preds1.numpy() writer.add_scalar(tag="D(x)=1", step=iteration, value=np.mean(preds_array)) z = np.random.rand(n, 64) zeros = np.zeros((n, 1)) z = to_variable(z) zeros = to_variable(zeros) z = fluid.layers.cast(z, dtype="float32") zeros = fluid.layers.cast(zeros, dtype="int64") preds2 = D(G(z)) preds_array = preds2.numpy() #print("DG(z),0:",preds_array.shape, np.mean(preds_array)) writer.add_scalar(tag="D(G(z))=0", step=iteration, value=np.mean(preds_array)) D_loss = criterion(preds1, x_labels) + criterion(preds2, zeros) D_loss.backward() D_optim.minimize(D_loss) D.clear_gradients() D_loss_meter.update(D_loss.numpy()[0], n) writer.add_scalar(tag="D_loss", step=iteration, value=D_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average D Loss: {D_loss_meter.avg:4f} ") z = np.random.rand(n, 64) ones = np.ones((n, 1)) z = to_variable(z) ones = to_variable(ones) z = fluid.layers.cast(z, dtype="float32") ones = fluid.layers.cast(ones, dtype="int64") preds = D(G(z)) preds_array = preds.numpy() #print("DG(z),1:",preds_array.shape, np.mean(preds_array)) writer.add_scalar(tag="D(G(z))=1", step=iteration, value=np.mean(preds_array)) G_loss = criterion(preds, ones) G_loss.backward() G_optim.minimize(G_loss) G.clear_gradients() G_loss_meter.update(G_loss.numpy()[0], n) writer.add_scalar(tag="G_loss", step=iteration, value=G_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average G Loss: {G_loss_meter.avg:4f}") if epoch % args.save_freq == 0 or epoch == args.epoch_num: D_model_path = os.path.join(args.checkpoint_folder, f"D_{args.net}-Epoch-{epoch}") G_model_path = os.path.join(args.checkpoint_folder, f"G_{args.net}-Epoch-{epoch}") # save model and optmizer states model_dict = D.state_dict() fluid.save_dygraph(model_dict, D_model_path) optim_dict = D_optim.state_dict() fluid.save_dygraph(optim_dict, D_model_path) model_dict = G.state_dict() fluid.save_dygraph(model_dict, G_model_path) optim_dict = G_optim.state_dict() fluid.save_dygraph(optim_dict, G_model_path) print( f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams' ) # Step 6: full training for Inverter E_optim = AdamOptimizer(learning_rate=args.lr * 10, parameter_list=E.parameters()) E_loss_meter = AverageMeter() for epoch in range(1, args.epoch_num + 1): for (x, x_labels) in x_dataloader(): n = x.shape[0] iteration += 1 x = fluid.layers.cast(x, dtype="float32") image = x.numpy()[0] * 255 writer.add_image(tag="x", step=iteration, img=image) x = fluid.layers.transpose(x, perm=[0, 3, 1, 2]) invert_x = G(E(x)) invert_image = fluid.layers.transpose(invert_x, perm=[0, 2, 3, 1]) invert_image = invert_image.numpy()[0] * 255 #print("D(x),1",preds_array.shape, np.mean(preds_array)) writer.add_image(tag="invert_x", step=iteration, img=invert_image) print(np.max(invert_image), np.min(invert_image)) E_loss = fluid.layers.mse_loss(invert_x, x) print("E_loss shape:", E_loss.numpy().shape) E_loss.backward() E_optim.minimize(E_loss) E.clear_gradients() E_loss_meter.update(E_loss.numpy()[0], n) writer.add_scalar(tag="E_loss", step=iteration, value=E_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average E Loss: {E_loss_meter.avg:4f}, ") if epoch % args.save_freq == 0 or epoch == args.epoch_num: E_model_path = os.path.join(args.checkpoint_folder, f"E_{args.net}-Epoch-{epoch}") # save model and optmizer states model_dict = E.state_dict() fluid.save_dygraph(model_dict, E_model_path) optim_dict = E_optim.state_dict() fluid.save_dygraph(optim_dict, E_model_path) print( f'----- Save model: {E_model_path}.pdparams, {E_model_path}.pdparams' )