Beispiel #1
0
def save_parameters(checkpoint_dir, iteration, model, optimizer=None):
    """Checkpoint the latest trained model parameters.

    Args:
        checkpoint_dir (str): the directory where checkpoint is saved.
        iteration (int): the latest iteration number.
        model (Layer): model to be checkpointed.
        optimizer (Optimizer, optional): optimizer to be checkpointed.
            Defaults to None.

    Returns:
        None
    """
    checkpoint_path = os.path.join(checkpoint_dir, "step-{}".format(iteration))

    model_dict = model.state_dict()
    params_path = checkpoint_path + ".pdparams"
    paddle.save(model_dict, params_path)
    print("[checkpoint] Saved model to {}".format(params_path))

    if optimizer:
        opt_dict = optimizer.state_dict()
        optimizer_path = checkpoint_path + ".pdopt"
        paddle.save(opt_dict, optimizer_path)
        print(
            "[checkpoint] Saved optimzier state to {}".format(optimizer_path))

    _save_checkpoint(checkpoint_dir, iteration)
Beispiel #2
0
def convert_weights(
    model_type: str,
    from_model: str,
    from_path: str,
    config_path: str,
    to_model: str,
    dump_path: str,
):
    model_class = MODEL_CLASSES[to_model][model_type]
    config = ConfigBase(config_path)
    model = model_class(config)
    load_weights_fct = LOAD_WEIGHTS_MAPS[to_model][model_type][from_model]
    if to_model == "tf":
        input_ids = tf.ones([3, 4], dtype=tf.int32)
        model(input_ids)
    load_weights_fct(model, config, from_path)

    if to_model == "pt":
        torch.save(model.state_dict(), dump_path)
    elif to_model == "tf":
        model.save_weights(dump_path)
    elif to_model == "ms":
        mindspore.save_checkpoint(model, dump_path)
    elif to_model == "of":
        flow.save(model.state_dict(), dump_path)
    elif to_model == "pd":
        paddle.save(model.state_dict(), dump_path)
    print("Save {} model to {}".format(to_model, dump_path))
Beispiel #3
0
def _train(model, train_dataloader):
    """
    tbd
    """
    # train the model
    n_epoch = cmd_args.num_epochs

    clip_grad = nn.ClipGradByNorm(clip_norm=cmd_args.clip_grad)
    optimizer = paddle.optimizer.Adam(parameters=model.parameters(), 
                                learning_rate=cmd_args.learning_rate, 
                                grad_clip=clip_grad)

    # start to train 
    for epoch in range(n_epoch):
        #kl_weight = kl_annealer(epoch)
        kl_weight = cmd_args.kl_coeff

        print('##########################################################################################', flush=True)
        print('EPOCH:%d' % (epoch), flush=True)

        postfix = _train_epoch(model, train_dataloader, epoch, kl_weight, optimizer=optimizer)

        # save state_dict
        paddle.save(model.state_dict(), cmd_args.save_dir + 'train_model_epoch' + str(epoch))
        paddle.save(optimizer.state_dict(), cmd_args.save_dir + 'train_optimizer_epoch' + str(epoch))

        print('epoch:%d loss:%f kl_loss:%f perplexity_loss:%f' % \
                        (epoch, postfix['loss'], postfix['kl_loss'], postfix['perplexity_loss']), flush=True)
        print('##########################################################################################', flush=True)

        # lr_annealer.step()

    return model
def update_vgg16_params(model_path):
    param_state_dict = paddle.load(model_path)
    # first conv weight name _conv_block_1._conv_1.weight, shape is [64, 3, ,3, 3]
    # first fc weight name: _fc1.weight, shape is [25088, 4096]
    for k, v in param_state_dict.items():
        print(k, v.shape)

    # # first weight
    weight = param_state_dict['_conv_block_1._conv_1.weight']  # [64, 3,3,3]
    print('ori shape: ', weight.shape)
    zeros_pad = paddle.zeros((64, 1, 3, 3))
    param_state_dict['_conv_block_1._conv_1.weight'] = paddle.concat(
        [weight, zeros_pad], axis=1)
    print('shape after padding',
          param_state_dict['_conv_block_1._conv_1.weight'].shape)

    # fc1
    weight = param_state_dict['_fc1.weight']
    weight = paddle.transpose(weight, [1, 0])
    print('after transpose: ', weight.shape)
    weight = paddle.reshape(weight, (4096, 512, 7, 7))
    print('after reshape: ', weight.shape)
    weight = weight[0:512, :, 2:5, 2:5]
    print('after crop: ', weight.shape)
    param_state_dict['_conv_6.weight'] = weight

    del param_state_dict['_fc1.weight']
    del param_state_dict['_fc1.bias']
    del param_state_dict['_fc2.weight']
    del param_state_dict['_fc2.bias']
    del param_state_dict['_out.weight']
    del param_state_dict['_out.bias']

    paddle.save(param_state_dict, 'VGG16_pretrained.pdparams')
Beispiel #5
0
 def test_dygraph_save_static_load(self):
     inps = np.random.randn(1, IMAGE_SIZE).astype('float32')
     path = 'test_dygraph_save_static_load/dy-static.pdparams'
     paddle.disable_static()
     with paddle.utils.unique_name.guard():
         layer = LinearNet()
         state_dict_dy = layer.state_dict()
         paddle.save(state_dict_dy, path)
     paddle.enable_static()
     with new_program_scope():
         layer = LinearNet()
         data = paddle.static.data(name='x_static_save',
                                   shape=(None, IMAGE_SIZE),
                                   dtype='float32')
         y_static = layer(data)
         program = paddle.static.default_main_program()
         place = fluid.CPUPlace(
         ) if not paddle.fluid.core.is_compiled_with_cuda(
         ) else fluid.CUDAPlace(0)
         exe = paddle.static.Executor(paddle.CPUPlace())
         exe.run(paddle.static.default_startup_program())
         state_dict = paddle.load(path, keep_name_table=True)
         program.set_state_dict(state_dict)
         state_dict_param = program.state_dict("param")
         for name, tensor in state_dict_dy.items():
             self.assertTrue(
                 np.array_equal(tensor.numpy(),
                                np.array(state_dict_param[tensor.name])))
Beispiel #6
0
 def save_pretrained(self, save_directory):
     """
     Save model configuration and related resources (model state) to files
     under `save_directory`.
     Args:
         save_directory (str): Directory to save files into.
     """
     assert os.path.isdir(
         save_directory
     ), "Saving directory ({}) should be a directory".format(save_directory)
     # save model config
     model_config_file = os.path.join(save_directory,
                                      self.model_config_file)
     model_config = self.init_config
     # If init_config contains a Layer, use the layer's init_config to save
     for key, value in model_config.items():
         if key == "init_args":
             args = []
             for arg in value:
                 args.append(arg.init_config if isinstance(
                     arg, PretrainedModel) else arg)
             model_config[key] = tuple(args)
         elif isinstance(value, PretrainedModel):
             model_config[key] = value.init_config
     with io.open(model_config_file, "w", encoding="utf-8") as f:
         f.write(json.dumps(model_config, ensure_ascii=False))
     # save model
     file_name = os.path.join(save_directory,
                              list(self.resource_files_names.values())[0])
     paddle.save(self.state_dict(), file_name)
Beispiel #7
0
    def train_model(self, use_custom_op=False, dy2stat=False):
        # reset random seed
        paddle.seed(self.seed)
        np.random.seed(self.seed)
        # paddle.framework.random._manual_program_seed(SEED)

        net = Net(self.in_dim, self.out_dim, use_custom_op)
        if dy2stat:
            net = paddle.jit.to_static(net, input_spec=[self.x_spec])
        mse_loss = paddle.nn.MSELoss()
        sgd = paddle.optimizer.SGD(learning_rate=0.1,
                                   parameters=net.parameters())

        for batch_id in range(self.batch_num):
            x = paddle.to_tensor(self.datas[batch_id])
            y = paddle.to_tensor(self.labels[batch_id])

            out = net(x)
            loss = mse_loss(out, y)

            loss.backward()
            sgd.minimize(loss)
            net.clear_gradients()

        # save inference model
        net.eval()
        if dy2stat:
            paddle.jit.save(net, self.model_dy2stat_path)
        else:
            paddle.save(net.state_dict(),
                        self.model_path_template.format(use_custom_op))

        return out.numpy()
Beispiel #8
0
def save_model(net, optimizer, model_path, epoch_id, prefix='rec'):
    model_path = os.path.join(model_path, str(epoch_id))
    _mkdir_if_not_exist(model_path)
    model_prefix = os.path.join(model_path, prefix)
    paddle.save(net.state_dict(), model_prefix + ".pdparams")
    paddle.save(optimizer.state_dict(), model_prefix + ".pdopt")
    logger.info("Already save model in {}".format(model_path))
Beispiel #9
0
def convert_pytorch_checkpoint_to_paddle(pytorch_checkpoint_path,
                                         paddle_dump_path):

    pytorch_state_dict = torch.load(pytorch_checkpoint_path,
                                    map_location="cpu")
    paddle_state_dict = OrderedDict()
    for k, v in pytorch_state_dict.items():
        is_transpose = False
        if k[-7:] == ".weight":
            if ".embeddings." not in k and ".LayerNorm." not in k:
                if v.ndim == 2:
                    v = v.transpose(0, 1)
                    is_transpose = True
        oldk = k
        for huggingface_name, paddle_name in huggingface_to_paddle.items():
            k = k.replace(huggingface_name, paddle_name)

        if "bert." not in k and "cls." not in k and "classifier" not in k:
            k = "bert." + k

        print(f"Converting: {oldk} => {k} | is_transpose {is_transpose}")
        if v.dtype == torch.float16:
            v = torch.tensor(v.data, dtype=torch.float32)

        paddle_state_dict[k] = v.data.numpy()

    paddle.save(paddle_state_dict, paddle_dump_path)
Beispiel #10
0
def save_model(output_path, model, steps, opt, lr_scheduler, max_ckpt=2):
    if paddle.distributed.get_rank() == 0:
        output_dir = os.path.join(output_path, "model_%d" % steps)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        paddle.save(model.state_dict(),
                    os.path.join(output_dir, "ckpt.pdparams"))
        #         paddle.save({ "global_step": steps }, os.path.join(output_dir, "step"))
        #         paddle.save(opt.state_dict(), os.path.join(output_dir, "opt.pdparams"))
        #         paddle.save(lr_scheduler.state_dict(), os.path.join(output_dir, "lr_scheduler.pdparams"))

        log.info("save model %s" % output_dir)

        ckpt_paths = glob.glob(os.path.join(output_path, "model_*"))
        if len(ckpt_paths) > max_ckpt:

            def version(x):
                x = int(x.split("_")[-1])
                return x

            rm_ckpt_paths = sorted(ckpt_paths, key=version,
                                   reverse=True)[max_ckpt:]
            for ckpt_dir in rm_ckpt_paths:
                if os.path.exists(ckpt_dir):
                    shutil.rmtree(ckpt_dir)
Beispiel #11
0
    def save(self, filename):
        """ Saves the model to the specified filename.

        Args:
            filename (`str`): The filename to save to.
        """
        paddle.save(self.state_dict(), filename)
    def test_inference(self):
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path, exist_ok=True)
        paddle.save(self.faster_tokenizer.state_dict(), self.param_path)
        state_dict = paddle.load(self.param_path)
        self.faster_tokenizer.set_dict(state_dict)

        static_model = paddle.jit.to_static(
            self.faster_tokenizer,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None], dtype=core.VarDesc.VarType.STRINGS),  # texts
            ])
        # Save in static graph model.
        paddle.jit.save(static_model, self.inference_path)
        predictor = Predictor(self.save_path)
        input_ids, token_type_ids = predictor.predict(self.text)

        encoded_inputs = self.bert_tokenizer(self.text)
        py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape(
            [1, -1])
        py_token_type_ids = np.array(
            encoded_inputs[0]["token_type_ids"]).reshape([1, -1])
        self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0,
                                    atol=0.01))
        self.assertTrue(
            np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01))
Beispiel #13
0
def convert_pytorch_checkpoint_to_paddle(pytorch_checkpoint_path,
                                         paddle_dump_path):
    import torch
    import paddle

    pytorch_state_dict = torch.load(pytorch_checkpoint_path,
                                    map_location="cpu")
    paddle_state_dict = OrderedDict()
    for k, v in pytorch_state_dict.items():
        transpose = False
        if k in skip_weights:
            continue
        if k[-7:] == ".weight":
            if not any([w in k for w in dont_transpose]):
                if v.ndim == 2:
                    v = v.transpose(0, 1)
                    transpose = True
        oldk = k
        for huggingface_name, paddle_name in huggingface_to_paddle.items():
            k = k.replace(huggingface_name, paddle_name)

        print(f"Converting: {oldk} => {k} | is_transpose {transpose}")
        paddle_state_dict[k] = v.data.numpy()

    paddle.save(paddle_state_dict, paddle_dump_path)
Beispiel #14
0
    def test_save_and_load(self):
        path = "/tmp/paddle_asp_save_st/"
        param_path = path + "asp.pdparams"
        model_path = path + "asp.pdmodel"

        paddle.save(self.main_program.state_dict(), param_path)
        paddle.save(self.main_program, model_path)

        prog = paddle.load(model_path)

        state_dict = paddle.load(param_path)
        prog.set_state_dict(state_dict)

        feeder = fluid.DataFeeder(feed_list=[self.img, self.label],
                                  place=self.place)

        data = (np.random.randn(64, 3, 32,
                                32), np.random.randint(10, size=(64, 1)))
        self.exe.run(prog, feed=feeder.feed([data]))

        for param in prog.global_block().all_parameters():
            if ASPHelper._is_supported_layer(prog, param.name):
                mat = np.array(fluid.global_scope().find_var(
                    param.name).get_tensor())
                self.assertTrue(
                    paddle.fluid.contrib.sparsity.check_sparsity(mat.T,
                                                                 n=2,
                                                                 m=4))
Beispiel #15
0
def train(config, model, loader, optim):
    model.train()
    global_step = 0
    total_loss = 0.0

    start = time.time()
    for epoch in range(config.epochs):
        for step, feed_dict in enumerate(loader()):
            global_step += 1
            feed_dict = data2tensor(feed_dict)
            loss = model(feed_dict)
            loss.backward()
            optim.step()
            optim.clear_grad()

            total_loss += loss.numpy()[0]
            if global_step % config.log_steps == 0:
                avg_loss = total_loss / config.log_steps
                total_loss = 0.0
                sec_per_batch = (time.time() - start) / config.log_steps
                start = time.time()
                log.info(
                    "sec/batch: %.6f | Epoch: %s | step: %s | train_loss: %.6f"
                    % (sec_per_batch, epoch, global_step, avg_loss))

        log.info("saving model in epoch %s" % (epoch))
        save_files = os.path.join(config.save_dir, "ckpt.pdparams")
        log.info("Epoch: %s | Saving model in %s" % (epoch, save_files))
        paddle.save(model.state_dict(), save_files)
Beispiel #16
0
    def evalute(self, dataloader: DataLoader, mode: str = 'dev'):
        logger.success(f'{mode} stage ...')

        self.model.eval()
        self.metric.reset()
        losses = []
        for batch in dataloader:
            input_ids, token_type_ids, labels = batch
            logits = self.model(input_ids, token_type_ids)
            loss = self.criterion(logits, labels)
            losses.append(num(loss))
            correct = self.metric.compute(logits, labels)
            self.metric.update(correct)
        accu = self.metric.accumulate()
        self.context_data.eval_acc = accu

        logger.info("eval loss: %.5f, accuracy: %.5f" %
                    (np.mean(losses), accu))
        self.model.train()
        self.metric.reset()

        self.context_data.eval_step += 1
        self.writer.add_scalar(tag='eval-acc',
                               value=accu,
                               step=self.context_data.eval_step)
        self.writer.add_scalar(tag='eval-loss',
                               value=np.sum(losses),
                               step=self.context_data.eval_step)

        if accu > self.context_data.eval_acc:
            self.context_data.eval_acc = accu
            logger.success(f'saving the best model ...')
            best_model_file = os.path.join(self.config.output_dir,
                                           'best.pdparams')
            paddle.save(self.model.state_dict(), best_model_file)
Beispiel #17
0
def torch2paddle():
    torch_path = "./data/mobilenet_v3_small-047dcff4.pth"
    paddle_path = "./data/mv3_small_paddle.pdparams"
    torch_state_dict = torch.load(torch_path)
    fc_names = ["classifier"]
    paddle_state_dict = {}
    for k in torch_state_dict:
        if "num_batches_tracked" in k:
            continue
        v = torch_state_dict[k].detach().cpu().numpy()
        flag = [i in k for i in fc_names]
        if any(flag) and "weight" in k:  # ignore bias
            new_shape = [1, 0] + list(range(2, v.ndim))
            print(
                f"name: {k}, ori shape: {v.shape}, new shape: {v.transpose(new_shape).shape}"
            )
            v = v.transpose(new_shape)
        k = k.replace("running_var", "_variance")
        k = k.replace("running_mean", "_mean")
        # if k not in model_state_dict:
        if False:
            print(k)
        else:
            paddle_state_dict[k] = v
    paddle.save(paddle_state_dict, paddle_path)
Beispiel #18
0
def main(args):
    if not args.use_cuda:
        paddle.set_device("cpu")
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    graph = load(args.dataset)

    model = SkipGramModel(graph.num_nodes,
                          args.embed_size,
                          args.neg_num,
                          sparse=not args.use_cuda)
    model = paddle.DataParallel(model)

    train_steps = int(graph.num_nodes / args.batch_size) * args.epoch
    scheduler = paddle.optimizer.lr.PolynomialDecay(
        learning_rate=args.learning_rate,
        decay_steps=train_steps,
        end_lr=0.0001)

    optim = Adam(learning_rate=scheduler, parameters=model.parameters())

    train_ds = ShardedDataset(graph.nodes)
    collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size,
                               args.neg_num, args.neg_sample_type)
    data_loader = Dataloader(train_ds,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=args.sample_workers,
                             collate_fn=collate_fn)

    for epoch in tqdm.tqdm(range(args.epoch)):
        train_loss = train(model, data_loader, optim)
        log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss)
    paddle.save(model.state_dict(), "model.pdparams")
Beispiel #19
0
def convert_pytorch_checkpoint_to_paddle(pytorch_checkpoint_path,
                                         huggingface_to_paddle):
    import torch
    import paddle
    pytorch_state_dict = torch.load(pytorch_checkpoint_path,
                                    map_location="cpu")
    paddle_state_dict = OrderedDict()
    Total_params = 0

    for k, v in pytorch_state_dict.items():
        mulValue = np.prod(v.shape)
        Total_params += mulValue
        is_transpose = False
        if k[-7:] == ".weight":
            if ".embeddings." not in k and ".LayerNorm." not in k:
                if v.ndim == 2:
                    v = v.transpose(0, 1)
                    is_transpose = True

        oldk = k
        for huggingface_name, paddle_name in huggingface_to_paddle.items():
            k = k.replace(huggingface_name, paddle_name)
        if k.startswith('distilbert.pooler.dense'):
            k = k.replace('distilbert.pooler.dense', 'pre_classifier')

        print(f"Converting: {oldk} => {k} | is_transpose {is_transpose}")
        paddle_state_dict[k] = v.data.numpy()
    paddle_dump_path = pytorch_checkpoint_path.replace('pytorch_model.bin',
                                                       'model_state.pdparams')
    paddle.save(paddle_state_dict, paddle_dump_path)
    print(f'Total params: {Total_params}')
Beispiel #20
0
def train(model, train_data_loader, val_data_loader, loss_fn, optimizer,
          n_epochs, model_name):
    best_auprc = -1
    for epoch_i in range(1, n_epochs + 1):

        start = time.time()
        model.train()
        ## Training
        train_loss, train_metrics = run_batch(model, optimizer,
                                              train_data_loader, epoch_i,
                                              "train", loss_fn)

        model.eval()
        with paddle.no_grad():

            ## Validation
            if val_data_loader:
                val_loss, val_metrics = run_batch(model, optimizer,
                                                  val_data_loader, epoch_i,
                                                  "val", loss_fn)
                if best_auprc < val_metrics[1]:
                    current_sate = get_model_params_state(
                        model, args, epoch_i, *val_metrics)
                    paddle.save(current_sate, f"{model_name}.pdparams")
                    best_auprc = val_metrics[1]

        if train_data_loader:
            print(f"\n#### Epoch {epoch_i} time {time.time() - start:.4f}s")
            print_metrics(train_loss, 0, 0)

        if val_data_loader:
            print(f"#### Validation epoch {epoch_i}")
            print_metrics(val_loss, *val_metrics)
Beispiel #21
0
def save_checkpoint(state, filename='checkpoint'):
    paddle.save(state['state_dict'], filename + '.pdparams')
    del state['state_dict']
    paddle.save(state['optimizer'], filename + '.pdopt')
    del state['optimizer']
    with open(filename + '.state.pickle', 'wb') as fout:
        pickle.dump(state, fout)
Beispiel #22
0
def do_train(args):
    last_step = args.num_train_epochs * len(train_data_loader)
    tic_train = time.time()

    for epoch in range(args.num_train_epochs):
        for step, batch in enumerate(train_data_loader):
            args.global_step += 1
            # print('~~~~~~~~~~~~~~~~~~~~~args.global_step',args.global_step)
            input_ids, token_type_ids, _, labels = batch
            logits = model(input_ids, token_type_ids)
            loss = loss_fct(logits, labels)
            avg_loss = paddle.mean(loss)

            if args.global_step % args.logging_steps == 0:
                print(
                    "global step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s"
                    % (args.global_step, epoch, step, avg_loss,
                       args.logging_steps / (time.time() - tic_train)))
                tic_train = time.time()

            avg_loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.clear_grad()

            if args.global_step % args.save_steps == 0 or args.global_step == last_step:
                if paddle.distributed.get_rank() == 0:
                    evaluate(model, loss_fct, valid_data_loader, label_num)
                    paddle.save(
                        model.state_dict(),
                        os.path.join(args.output_dir,
                                     "model_%d.pdparams" % args.global_step))
def main():
    running_reward = 10
    for i_episode in count(1):
        state, ep_reward = env.reset(), 0
        for t in range(1, 10000):  # Don't infinite loop while learning
            action = policy.select_action(state)
            state, reward, done, _ = env.step(action.numpy()[0][0])
            if render:
                env.render()
            policy.rewards.append(reward)
            ep_reward += reward
            if done:
                break

        running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward
        finish_episode()
        if i_episode % log_interval == 0:
            print('Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}'.format(
                i_episode, ep_reward, running_reward))
            # save model
            paddle.save(policy.state_dict(), 'model/model.pdparams')
        if running_reward > env.spec.reward_threshold:
            print("Solved! Running reward is now {} and "
                  "the last episode runs is {}!".format(running_reward, i_episode))
            break
    def test_pp_model(self):
        hcg = fleet.get_hybrid_communicate_group()
        word_size = hcg.get_model_parallel_world_size()
        dp_id = hcg.get_data_parallel_rank()
        pp_id = hcg.get_stage_id()
        rank_id = dist.get_rank()
        topology = hcg.topology()
        set_random_seed(1024, dp_id, rank_id)

        model = ModelPipe(topology)
        scheduler = paddle.optimizer.lr.PiecewiseDecay(
            boundaries=[2], values=[0.001, 0.002], verbose=True)
        optimizer = paddle.optimizer.SGD(learning_rate=scheduler,
                                         parameters=model.parameters())

        model = fleet.distributed_model(model)
        optimizer = fleet.distributed_optimizer(optimizer)
        output_dir = tempfile.mkdtemp()

        # warmup step
        for step_id in range(2):
            x_data = np.random.randint(0, vocab_size, size=[batch_size, length])
            x = paddle.to_tensor(x_data)
            x.stop_gradient = True
            loss = model.train_batch([x, x], optimizer, scheduler)

        model._layers.save_state_dict(output_dir)
        paddle.save(optimizer.state_dict(),
                    os.path.join(output_dir, "model_state.pdopt"))

        # construct data
        test_steps = 5
        np_data = np.random.randint(
            0, vocab_size, size=[test_steps, batch_size, length])

        origin_loss = []
        for step_id in range(5):
            x_data = np_data[step_id, :]
            x = paddle.to_tensor(x_data)
            x.stop_gradient = True
            loss = model.train_batch([x, x], optimizer, scheduler)
            origin_loss.append(loss.numpy())

        # test step
        model._layers.set_state_dir(output_dir)
        opt_dict = paddle.load(os.path.join(output_dir, "model_state.pdopt"))
        optimizer.set_state_dict(opt_dict)

        for step_id in range(5):
            x_data = np_data[step_id, :]
            x = paddle.to_tensor(x_data)
            x.stop_gradient = True
            loss = model.train_batch([x, x], optimizer, scheduler)
            print("origin loss: ", origin_loss[step_id], "current loss: ",
                  loss.numpy())
            np.testing.assert_allclose(loss.numpy(), origin_loss[step_id])

        # finally, remove the model/optimizer path
        shutil.rmtree(output_dir)
def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path):
    paddle.save({
        'epoch': epoch,
        'model': net_state_dict,
        'optimizer': optimizer_state_dict,
        'best_score': best_score
    }, checkpoint_path)
    paddle.save(net_state_dict, model_path)
Beispiel #26
0
def save(model, optimzer, save_path):
    try:
        paddle.save(model.state_dict(), save_path + '.pdparams')
        paddle.save(optimzer.state_dict(), save_path + '.pdopt')
    except Exception as e:
        logging.error('save model and optimzer failed. save path: %s',
                      save_path)
        logging.error(traceback.format_exc())
Beispiel #27
0
def save_model(path, epoch, model, optimizer, id_classifier):
    state_dict = model.state_dict()
    data = {
        'epoch': epoch,
        'state_dict': model_rebuild(state_dict),
        'optimizer': model_rebuild(optimizer.state_dict()),
        'id_classifier': model_rebuild(id_classifier.state_dict())
    }
    paddle.save(data, path)
def main():
    """主函数"""
    logger.info("1. Load data")
    data_file = download()
    logger.info("2. Data preprocess")
    corpus = load_data(data_file)
    corpus = data_preprocess(corpus)
    logger.info("3. Build vocabulary")
    word2id_freq, word2id_dict, id2word_dict = build_vocab(corpus)
    vocab_size = len(word2id_dict)
    logger.info("there are totoally {} different words in the corpus".format(
        vocab_size))

    logger.info("4. words to ids")
    corpus = convert_corpus_to_id(corpus, word2id_dict)
    logger.info("{} tokens in the corpus".format(len(corpus)))
    corpus = subsampling(corpus, word2id_freq)[:10000]
    logger.info("{} tokens in the corpus".format(len(corpus)))

    logger.info("5. build dataset")
    dataset = build_data(corpus, vocab_size)
    logger.info("6. build batch dataset")
    batch_size = 512
    epoch_num = 3
    for i in range(3):
        random.shuffle(dataset)
    data_num = len(dataset)
    train_dataset = dataset[:int(data_num * 0.9)]
    valid_dataset = dataset[int(data_num * 0.9):]

    logger.info("7. build model")
    model = SkipGram(vocab_size=vocab_size, embedding_size=200)
    # 构造训练这个网络的优化器
    opt = paddle.optimizer.Adam(learning_rate=0.001,
                                parameters=model.parameters())

    logger.info("8. train model")
    train(model,
          opt,
          dataset=train_dataset,
          word2id_dict=word2id_dict,
          id2word_dict=id2word_dict,
          batch_size=batch_size,
          epoch_num=epoch_num)

    logger.info("9. evaluate model")
    accuracy = evaluate(model, valid_dataset=valid_dataset)
    logger.info("accuracy value: {}".format(accuracy))

    logger.info("10. save model")
    model_path = os.path.join(work_root, "models")
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    paddle.save(model.state_dict(),
                os.path.join(model_path, "{}.pdparams".format("lasted")))

    return word2id_dict, id2word_dict
Beispiel #29
0
 def __call__(self,
              global_step,
              backbone: paddle.nn.Layer,
              partial_fc: PartialFC = None):
     if global_step > 100 and self.rank is 0:
         paddle.save(backbone.state_dict(),
                     os.path.join(self.output, "backbone.pdparams"))
     if global_step > 100 and partial_fc is not None:
         partial_fc.save_params()
    def replace_save_vars(self, program, dirname):
        def predicate(var):
            return var.persistable

        vars = filter(predicate, program.list_vars())
        for var in vars:
            paddle.save(var.get_value(),
                        os.path.join(dirname, var.name),
                        use_binary_format=True)