def save_parameters(checkpoint_dir, iteration, model, optimizer=None): """Checkpoint the latest trained model parameters. Args: checkpoint_dir (str): the directory where checkpoint is saved. iteration (int): the latest iteration number. model (Layer): model to be checkpointed. optimizer (Optimizer, optional): optimizer to be checkpointed. Defaults to None. Returns: None """ checkpoint_path = os.path.join(checkpoint_dir, "step-{}".format(iteration)) model_dict = model.state_dict() params_path = checkpoint_path + ".pdparams" paddle.save(model_dict, params_path) print("[checkpoint] Saved model to {}".format(params_path)) if optimizer: opt_dict = optimizer.state_dict() optimizer_path = checkpoint_path + ".pdopt" paddle.save(opt_dict, optimizer_path) print( "[checkpoint] Saved optimzier state to {}".format(optimizer_path)) _save_checkpoint(checkpoint_dir, iteration)
def convert_weights( model_type: str, from_model: str, from_path: str, config_path: str, to_model: str, dump_path: str, ): model_class = MODEL_CLASSES[to_model][model_type] config = ConfigBase(config_path) model = model_class(config) load_weights_fct = LOAD_WEIGHTS_MAPS[to_model][model_type][from_model] if to_model == "tf": input_ids = tf.ones([3, 4], dtype=tf.int32) model(input_ids) load_weights_fct(model, config, from_path) if to_model == "pt": torch.save(model.state_dict(), dump_path) elif to_model == "tf": model.save_weights(dump_path) elif to_model == "ms": mindspore.save_checkpoint(model, dump_path) elif to_model == "of": flow.save(model.state_dict(), dump_path) elif to_model == "pd": paddle.save(model.state_dict(), dump_path) print("Save {} model to {}".format(to_model, dump_path))
def _train(model, train_dataloader): """ tbd """ # train the model n_epoch = cmd_args.num_epochs clip_grad = nn.ClipGradByNorm(clip_norm=cmd_args.clip_grad) optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=cmd_args.learning_rate, grad_clip=clip_grad) # start to train for epoch in range(n_epoch): #kl_weight = kl_annealer(epoch) kl_weight = cmd_args.kl_coeff print('##########################################################################################', flush=True) print('EPOCH:%d' % (epoch), flush=True) postfix = _train_epoch(model, train_dataloader, epoch, kl_weight, optimizer=optimizer) # save state_dict paddle.save(model.state_dict(), cmd_args.save_dir + 'train_model_epoch' + str(epoch)) paddle.save(optimizer.state_dict(), cmd_args.save_dir + 'train_optimizer_epoch' + str(epoch)) print('epoch:%d loss:%f kl_loss:%f perplexity_loss:%f' % \ (epoch, postfix['loss'], postfix['kl_loss'], postfix['perplexity_loss']), flush=True) print('##########################################################################################', flush=True) # lr_annealer.step() return model
def update_vgg16_params(model_path): param_state_dict = paddle.load(model_path) # first conv weight name _conv_block_1._conv_1.weight, shape is [64, 3, ,3, 3] # first fc weight name: _fc1.weight, shape is [25088, 4096] for k, v in param_state_dict.items(): print(k, v.shape) # # first weight weight = param_state_dict['_conv_block_1._conv_1.weight'] # [64, 3,3,3] print('ori shape: ', weight.shape) zeros_pad = paddle.zeros((64, 1, 3, 3)) param_state_dict['_conv_block_1._conv_1.weight'] = paddle.concat( [weight, zeros_pad], axis=1) print('shape after padding', param_state_dict['_conv_block_1._conv_1.weight'].shape) # fc1 weight = param_state_dict['_fc1.weight'] weight = paddle.transpose(weight, [1, 0]) print('after transpose: ', weight.shape) weight = paddle.reshape(weight, (4096, 512, 7, 7)) print('after reshape: ', weight.shape) weight = weight[0:512, :, 2:5, 2:5] print('after crop: ', weight.shape) param_state_dict['_conv_6.weight'] = weight del param_state_dict['_fc1.weight'] del param_state_dict['_fc1.bias'] del param_state_dict['_fc2.weight'] del param_state_dict['_fc2.bias'] del param_state_dict['_out.weight'] del param_state_dict['_out.bias'] paddle.save(param_state_dict, 'VGG16_pretrained.pdparams')
def test_dygraph_save_static_load(self): inps = np.random.randn(1, IMAGE_SIZE).astype('float32') path = 'test_dygraph_save_static_load/dy-static.pdparams' paddle.disable_static() with paddle.utils.unique_name.guard(): layer = LinearNet() state_dict_dy = layer.state_dict() paddle.save(state_dict_dy, path) paddle.enable_static() with new_program_scope(): layer = LinearNet() data = paddle.static.data(name='x_static_save', shape=(None, IMAGE_SIZE), dtype='float32') y_static = layer(data) program = paddle.static.default_main_program() place = fluid.CPUPlace( ) if not paddle.fluid.core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0) exe = paddle.static.Executor(paddle.CPUPlace()) exe.run(paddle.static.default_startup_program()) state_dict = paddle.load(path, keep_name_table=True) program.set_state_dict(state_dict) state_dict_param = program.state_dict("param") for name, tensor in state_dict_dy.items(): self.assertTrue( np.array_equal(tensor.numpy(), np.array(state_dict_param[tensor.name])))
def save_pretrained(self, save_directory): """ Save model configuration and related resources (model state) to files under `save_directory`. Args: save_directory (str): Directory to save files into. """ assert os.path.isdir( save_directory ), "Saving directory ({}) should be a directory".format(save_directory) # save model config model_config_file = os.path.join(save_directory, self.model_config_file) model_config = self.init_config # If init_config contains a Layer, use the layer's init_config to save for key, value in model_config.items(): if key == "init_args": args = [] for arg in value: args.append(arg.init_config if isinstance( arg, PretrainedModel) else arg) model_config[key] = tuple(args) elif isinstance(value, PretrainedModel): model_config[key] = value.init_config with io.open(model_config_file, "w", encoding="utf-8") as f: f.write(json.dumps(model_config, ensure_ascii=False)) # save model file_name = os.path.join(save_directory, list(self.resource_files_names.values())[0]) paddle.save(self.state_dict(), file_name)
def train_model(self, use_custom_op=False, dy2stat=False): # reset random seed paddle.seed(self.seed) np.random.seed(self.seed) # paddle.framework.random._manual_program_seed(SEED) net = Net(self.in_dim, self.out_dim, use_custom_op) if dy2stat: net = paddle.jit.to_static(net, input_spec=[self.x_spec]) mse_loss = paddle.nn.MSELoss() sgd = paddle.optimizer.SGD(learning_rate=0.1, parameters=net.parameters()) for batch_id in range(self.batch_num): x = paddle.to_tensor(self.datas[batch_id]) y = paddle.to_tensor(self.labels[batch_id]) out = net(x) loss = mse_loss(out, y) loss.backward() sgd.minimize(loss) net.clear_gradients() # save inference model net.eval() if dy2stat: paddle.jit.save(net, self.model_dy2stat_path) else: paddle.save(net.state_dict(), self.model_path_template.format(use_custom_op)) return out.numpy()
def save_model(net, optimizer, model_path, epoch_id, prefix='rec'): model_path = os.path.join(model_path, str(epoch_id)) _mkdir_if_not_exist(model_path) model_prefix = os.path.join(model_path, prefix) paddle.save(net.state_dict(), model_prefix + ".pdparams") paddle.save(optimizer.state_dict(), model_prefix + ".pdopt") logger.info("Already save model in {}".format(model_path))
def convert_pytorch_checkpoint_to_paddle(pytorch_checkpoint_path, paddle_dump_path): pytorch_state_dict = torch.load(pytorch_checkpoint_path, map_location="cpu") paddle_state_dict = OrderedDict() for k, v in pytorch_state_dict.items(): is_transpose = False if k[-7:] == ".weight": if ".embeddings." not in k and ".LayerNorm." not in k: if v.ndim == 2: v = v.transpose(0, 1) is_transpose = True oldk = k for huggingface_name, paddle_name in huggingface_to_paddle.items(): k = k.replace(huggingface_name, paddle_name) if "bert." not in k and "cls." not in k and "classifier" not in k: k = "bert." + k print(f"Converting: {oldk} => {k} | is_transpose {is_transpose}") if v.dtype == torch.float16: v = torch.tensor(v.data, dtype=torch.float32) paddle_state_dict[k] = v.data.numpy() paddle.save(paddle_state_dict, paddle_dump_path)
def save_model(output_path, model, steps, opt, lr_scheduler, max_ckpt=2): if paddle.distributed.get_rank() == 0: output_dir = os.path.join(output_path, "model_%d" % steps) if not os.path.exists(output_dir): os.makedirs(output_dir) paddle.save(model.state_dict(), os.path.join(output_dir, "ckpt.pdparams")) # paddle.save({ "global_step": steps }, os.path.join(output_dir, "step")) # paddle.save(opt.state_dict(), os.path.join(output_dir, "opt.pdparams")) # paddle.save(lr_scheduler.state_dict(), os.path.join(output_dir, "lr_scheduler.pdparams")) log.info("save model %s" % output_dir) ckpt_paths = glob.glob(os.path.join(output_path, "model_*")) if len(ckpt_paths) > max_ckpt: def version(x): x = int(x.split("_")[-1]) return x rm_ckpt_paths = sorted(ckpt_paths, key=version, reverse=True)[max_ckpt:] for ckpt_dir in rm_ckpt_paths: if os.path.exists(ckpt_dir): shutil.rmtree(ckpt_dir)
def save(self, filename): """ Saves the model to the specified filename. Args: filename (`str`): The filename to save to. """ paddle.save(self.state_dict(), filename)
def test_inference(self): if not os.path.exists(self.save_path): os.makedirs(self.save_path, exist_ok=True) paddle.save(self.faster_tokenizer.state_dict(), self.param_path) state_dict = paddle.load(self.param_path) self.faster_tokenizer.set_dict(state_dict) static_model = paddle.jit.to_static( self.faster_tokenizer, input_spec=[ paddle.static.InputSpec( shape=[None], dtype=core.VarDesc.VarType.STRINGS), # texts ]) # Save in static graph model. paddle.jit.save(static_model, self.inference_path) predictor = Predictor(self.save_path) input_ids, token_type_ids = predictor.predict(self.text) encoded_inputs = self.bert_tokenizer(self.text) py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape( [1, -1]) py_token_type_ids = np.array( encoded_inputs[0]["token_type_ids"]).reshape([1, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01))
def convert_pytorch_checkpoint_to_paddle(pytorch_checkpoint_path, paddle_dump_path): import torch import paddle pytorch_state_dict = torch.load(pytorch_checkpoint_path, map_location="cpu") paddle_state_dict = OrderedDict() for k, v in pytorch_state_dict.items(): transpose = False if k in skip_weights: continue if k[-7:] == ".weight": if not any([w in k for w in dont_transpose]): if v.ndim == 2: v = v.transpose(0, 1) transpose = True oldk = k for huggingface_name, paddle_name in huggingface_to_paddle.items(): k = k.replace(huggingface_name, paddle_name) print(f"Converting: {oldk} => {k} | is_transpose {transpose}") paddle_state_dict[k] = v.data.numpy() paddle.save(paddle_state_dict, paddle_dump_path)
def test_save_and_load(self): path = "/tmp/paddle_asp_save_st/" param_path = path + "asp.pdparams" model_path = path + "asp.pdmodel" paddle.save(self.main_program.state_dict(), param_path) paddle.save(self.main_program, model_path) prog = paddle.load(model_path) state_dict = paddle.load(param_path) prog.set_state_dict(state_dict) feeder = fluid.DataFeeder(feed_list=[self.img, self.label], place=self.place) data = (np.random.randn(64, 3, 32, 32), np.random.randint(10, size=(64, 1))) self.exe.run(prog, feed=feeder.feed([data])) for param in prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(prog, param.name): mat = np.array(fluid.global_scope().find_var( param.name).get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity(mat.T, n=2, m=4))
def train(config, model, loader, optim): model.train() global_step = 0 total_loss = 0.0 start = time.time() for epoch in range(config.epochs): for step, feed_dict in enumerate(loader()): global_step += 1 feed_dict = data2tensor(feed_dict) loss = model(feed_dict) loss.backward() optim.step() optim.clear_grad() total_loss += loss.numpy()[0] if global_step % config.log_steps == 0: avg_loss = total_loss / config.log_steps total_loss = 0.0 sec_per_batch = (time.time() - start) / config.log_steps start = time.time() log.info( "sec/batch: %.6f | Epoch: %s | step: %s | train_loss: %.6f" % (sec_per_batch, epoch, global_step, avg_loss)) log.info("saving model in epoch %s" % (epoch)) save_files = os.path.join(config.save_dir, "ckpt.pdparams") log.info("Epoch: %s | Saving model in %s" % (epoch, save_files)) paddle.save(model.state_dict(), save_files)
def evalute(self, dataloader: DataLoader, mode: str = 'dev'): logger.success(f'{mode} stage ...') self.model.eval() self.metric.reset() losses = [] for batch in dataloader: input_ids, token_type_ids, labels = batch logits = self.model(input_ids, token_type_ids) loss = self.criterion(logits, labels) losses.append(num(loss)) correct = self.metric.compute(logits, labels) self.metric.update(correct) accu = self.metric.accumulate() self.context_data.eval_acc = accu logger.info("eval loss: %.5f, accuracy: %.5f" % (np.mean(losses), accu)) self.model.train() self.metric.reset() self.context_data.eval_step += 1 self.writer.add_scalar(tag='eval-acc', value=accu, step=self.context_data.eval_step) self.writer.add_scalar(tag='eval-loss', value=np.sum(losses), step=self.context_data.eval_step) if accu > self.context_data.eval_acc: self.context_data.eval_acc = accu logger.success(f'saving the best model ...') best_model_file = os.path.join(self.config.output_dir, 'best.pdparams') paddle.save(self.model.state_dict(), best_model_file)
def torch2paddle(): torch_path = "./data/mobilenet_v3_small-047dcff4.pth" paddle_path = "./data/mv3_small_paddle.pdparams" torch_state_dict = torch.load(torch_path) fc_names = ["classifier"] paddle_state_dict = {} for k in torch_state_dict: if "num_batches_tracked" in k: continue v = torch_state_dict[k].detach().cpu().numpy() flag = [i in k for i in fc_names] if any(flag) and "weight" in k: # ignore bias new_shape = [1, 0] + list(range(2, v.ndim)) print( f"name: {k}, ori shape: {v.shape}, new shape: {v.transpose(new_shape).shape}" ) v = v.transpose(new_shape) k = k.replace("running_var", "_variance") k = k.replace("running_mean", "_mean") # if k not in model_state_dict: if False: print(k) else: paddle_state_dict[k] = v paddle.save(paddle_state_dict, paddle_path)
def main(args): if not args.use_cuda: paddle.set_device("cpu") if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() graph = load(args.dataset) model = SkipGramModel(graph.num_nodes, args.embed_size, args.neg_num, sparse=not args.use_cuda) model = paddle.DataParallel(model) train_steps = int(graph.num_nodes / args.batch_size) * args.epoch scheduler = paddle.optimizer.lr.PolynomialDecay( learning_rate=args.learning_rate, decay_steps=train_steps, end_lr=0.0001) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) train_ds = ShardedDataset(graph.nodes) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) for epoch in tqdm.tqdm(range(args.epoch)): train_loss = train(model, data_loader, optim) log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss) paddle.save(model.state_dict(), "model.pdparams")
def convert_pytorch_checkpoint_to_paddle(pytorch_checkpoint_path, huggingface_to_paddle): import torch import paddle pytorch_state_dict = torch.load(pytorch_checkpoint_path, map_location="cpu") paddle_state_dict = OrderedDict() Total_params = 0 for k, v in pytorch_state_dict.items(): mulValue = np.prod(v.shape) Total_params += mulValue is_transpose = False if k[-7:] == ".weight": if ".embeddings." not in k and ".LayerNorm." not in k: if v.ndim == 2: v = v.transpose(0, 1) is_transpose = True oldk = k for huggingface_name, paddle_name in huggingface_to_paddle.items(): k = k.replace(huggingface_name, paddle_name) if k.startswith('distilbert.pooler.dense'): k = k.replace('distilbert.pooler.dense', 'pre_classifier') print(f"Converting: {oldk} => {k} | is_transpose {is_transpose}") paddle_state_dict[k] = v.data.numpy() paddle_dump_path = pytorch_checkpoint_path.replace('pytorch_model.bin', 'model_state.pdparams') paddle.save(paddle_state_dict, paddle_dump_path) print(f'Total params: {Total_params}')
def train(model, train_data_loader, val_data_loader, loss_fn, optimizer, n_epochs, model_name): best_auprc = -1 for epoch_i in range(1, n_epochs + 1): start = time.time() model.train() ## Training train_loss, train_metrics = run_batch(model, optimizer, train_data_loader, epoch_i, "train", loss_fn) model.eval() with paddle.no_grad(): ## Validation if val_data_loader: val_loss, val_metrics = run_batch(model, optimizer, val_data_loader, epoch_i, "val", loss_fn) if best_auprc < val_metrics[1]: current_sate = get_model_params_state( model, args, epoch_i, *val_metrics) paddle.save(current_sate, f"{model_name}.pdparams") best_auprc = val_metrics[1] if train_data_loader: print(f"\n#### Epoch {epoch_i} time {time.time() - start:.4f}s") print_metrics(train_loss, 0, 0) if val_data_loader: print(f"#### Validation epoch {epoch_i}") print_metrics(val_loss, *val_metrics)
def save_checkpoint(state, filename='checkpoint'): paddle.save(state['state_dict'], filename + '.pdparams') del state['state_dict'] paddle.save(state['optimizer'], filename + '.pdopt') del state['optimizer'] with open(filename + '.state.pickle', 'wb') as fout: pickle.dump(state, fout)
def do_train(args): last_step = args.num_train_epochs * len(train_data_loader) tic_train = time.time() for epoch in range(args.num_train_epochs): for step, batch in enumerate(train_data_loader): args.global_step += 1 # print('~~~~~~~~~~~~~~~~~~~~~args.global_step',args.global_step) input_ids, token_type_ids, _, labels = batch logits = model(input_ids, token_type_ids) loss = loss_fct(logits, labels) avg_loss = paddle.mean(loss) if args.global_step % args.logging_steps == 0: print( "global step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s" % (args.global_step, epoch, step, avg_loss, args.logging_steps / (time.time() - tic_train))) tic_train = time.time() avg_loss.backward() optimizer.step() lr_scheduler.step() optimizer.clear_grad() if args.global_step % args.save_steps == 0 or args.global_step == last_step: if paddle.distributed.get_rank() == 0: evaluate(model, loss_fct, valid_data_loader, label_num) paddle.save( model.state_dict(), os.path.join(args.output_dir, "model_%d.pdparams" % args.global_step))
def main(): running_reward = 10 for i_episode in count(1): state, ep_reward = env.reset(), 0 for t in range(1, 10000): # Don't infinite loop while learning action = policy.select_action(state) state, reward, done, _ = env.step(action.numpy()[0][0]) if render: env.render() policy.rewards.append(reward) ep_reward += reward if done: break running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward finish_episode() if i_episode % log_interval == 0: print('Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}'.format( i_episode, ep_reward, running_reward)) # save model paddle.save(policy.state_dict(), 'model/model.pdparams') if running_reward > env.spec.reward_threshold: print("Solved! Running reward is now {} and " "the last episode runs is {}!".format(running_reward, i_episode)) break
def test_pp_model(self): hcg = fleet.get_hybrid_communicate_group() word_size = hcg.get_model_parallel_world_size() dp_id = hcg.get_data_parallel_rank() pp_id = hcg.get_stage_id() rank_id = dist.get_rank() topology = hcg.topology() set_random_seed(1024, dp_id, rank_id) model = ModelPipe(topology) scheduler = paddle.optimizer.lr.PiecewiseDecay( boundaries=[2], values=[0.001, 0.002], verbose=True) optimizer = paddle.optimizer.SGD(learning_rate=scheduler, parameters=model.parameters()) model = fleet.distributed_model(model) optimizer = fleet.distributed_optimizer(optimizer) output_dir = tempfile.mkdtemp() # warmup step for step_id in range(2): x_data = np.random.randint(0, vocab_size, size=[batch_size, length]) x = paddle.to_tensor(x_data) x.stop_gradient = True loss = model.train_batch([x, x], optimizer, scheduler) model._layers.save_state_dict(output_dir) paddle.save(optimizer.state_dict(), os.path.join(output_dir, "model_state.pdopt")) # construct data test_steps = 5 np_data = np.random.randint( 0, vocab_size, size=[test_steps, batch_size, length]) origin_loss = [] for step_id in range(5): x_data = np_data[step_id, :] x = paddle.to_tensor(x_data) x.stop_gradient = True loss = model.train_batch([x, x], optimizer, scheduler) origin_loss.append(loss.numpy()) # test step model._layers.set_state_dir(output_dir) opt_dict = paddle.load(os.path.join(output_dir, "model_state.pdopt")) optimizer.set_state_dict(opt_dict) for step_id in range(5): x_data = np_data[step_id, :] x = paddle.to_tensor(x_data) x.stop_gradient = True loss = model.train_batch([x, x], optimizer, scheduler) print("origin loss: ", origin_loss[step_id], "current loss: ", loss.numpy()) np.testing.assert_allclose(loss.numpy(), origin_loss[step_id]) # finally, remove the model/optimizer path shutil.rmtree(output_dir)
def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path): paddle.save({ 'epoch': epoch, 'model': net_state_dict, 'optimizer': optimizer_state_dict, 'best_score': best_score }, checkpoint_path) paddle.save(net_state_dict, model_path)
def save(model, optimzer, save_path): try: paddle.save(model.state_dict(), save_path + '.pdparams') paddle.save(optimzer.state_dict(), save_path + '.pdopt') except Exception as e: logging.error('save model and optimzer failed. save path: %s', save_path) logging.error(traceback.format_exc())
def save_model(path, epoch, model, optimizer, id_classifier): state_dict = model.state_dict() data = { 'epoch': epoch, 'state_dict': model_rebuild(state_dict), 'optimizer': model_rebuild(optimizer.state_dict()), 'id_classifier': model_rebuild(id_classifier.state_dict()) } paddle.save(data, path)
def main(): """主函数""" logger.info("1. Load data") data_file = download() logger.info("2. Data preprocess") corpus = load_data(data_file) corpus = data_preprocess(corpus) logger.info("3. Build vocabulary") word2id_freq, word2id_dict, id2word_dict = build_vocab(corpus) vocab_size = len(word2id_dict) logger.info("there are totoally {} different words in the corpus".format( vocab_size)) logger.info("4. words to ids") corpus = convert_corpus_to_id(corpus, word2id_dict) logger.info("{} tokens in the corpus".format(len(corpus))) corpus = subsampling(corpus, word2id_freq)[:10000] logger.info("{} tokens in the corpus".format(len(corpus))) logger.info("5. build dataset") dataset = build_data(corpus, vocab_size) logger.info("6. build batch dataset") batch_size = 512 epoch_num = 3 for i in range(3): random.shuffle(dataset) data_num = len(dataset) train_dataset = dataset[:int(data_num * 0.9)] valid_dataset = dataset[int(data_num * 0.9):] logger.info("7. build model") model = SkipGram(vocab_size=vocab_size, embedding_size=200) # 构造训练这个网络的优化器 opt = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) logger.info("8. train model") train(model, opt, dataset=train_dataset, word2id_dict=word2id_dict, id2word_dict=id2word_dict, batch_size=batch_size, epoch_num=epoch_num) logger.info("9. evaluate model") accuracy = evaluate(model, valid_dataset=valid_dataset) logger.info("accuracy value: {}".format(accuracy)) logger.info("10. save model") model_path = os.path.join(work_root, "models") if not os.path.exists(model_path): os.makedirs(model_path) paddle.save(model.state_dict(), os.path.join(model_path, "{}.pdparams".format("lasted"))) return word2id_dict, id2word_dict
def __call__(self, global_step, backbone: paddle.nn.Layer, partial_fc: PartialFC = None): if global_step > 100 and self.rank is 0: paddle.save(backbone.state_dict(), os.path.join(self.output, "backbone.pdparams")) if global_step > 100 and partial_fc is not None: partial_fc.save_params()
def replace_save_vars(self, program, dirname): def predicate(var): return var.persistable vars = filter(predicate, program.list_vars()) for var in vars: paddle.save(var.get_value(), os.path.join(dirname, var.name), use_binary_format=True)