Exemple #1
0
    def load_pretrain_params(self, exe, pretrain, prog, place):
        """
        The pretrained params are ResNet50 pretrained on ImageNet.
        However, conv1_weights' shape of StNet is not the same as that in ResNet50 because the input are super-image
        concatanated by a series of images. So it is recommendated to treat conv1_weights specifically.
        The process is as following:
          1, load params from pretrain
          2, get the value of conv1_weights in the state_dict and transform it
          3, set the transformed value to conv1_weights in prog
        """

        logger.info(
            "Load pretrain weights from {}, exclude fc, batch_norm, xception, conv3d layers."
            .format(pretrain))

        state_dict = fluid.load_program_state(pretrain)
        dict_keys = list(state_dict.keys())
        for name in dict_keys:
            if ("batch_norm" in name) or ("fc_0" in name) or ("batch_norm" in name) \
                     or ("xception" in name) or ("conv3d" in name):
                del state_dict[name]
                logger.info(
                    'Delete {} from pretrained parameters. Do not load it'.
                    format(name))
        conv1_weights = state_dict["conv1_weights"]
        conv1_weights = np.mean(conv1_weights, axis=1,
                                keepdims=True) / self.seglen
        conv1_weights = np.repeat(conv1_weights, 3 * self.seglen, axis=1)
        logger.info(
            'conv1_weights is transformed from [Cout, 3, Kh, Kw] into [Cout, 3*seglen, Kh, Kw]'
        )
        state_dict["conv1_weights"] = conv1_weights
        fluid.set_program_state(prog, state_dict)
Exemple #2
0
def load_weights_params_from_file(exe, prog, weights, place):
    """
    The params of the training process is stored in the file named weights.
    However, the network of the training and test process is slightly different due to the layer 
    named "pred" was fc in trainng but convolution in test. When loading weights of pred (pred_w), 
    from the pretrained file, shape mismatch error will be raised due to the check in fluid.io. 
    This check on params' shape is newly added in fluid.version==1.6.0. So it is recommendated to 
    treat pred_w specifically.
    The process is as following:
      1, load the parmas from weights file into a state_dict
      2, specifically treat the paramter named "pred_w" from the foramt of fc into convolution
      3, set the state_dict to prog
    """

    logger.info('Load test weights from {}'.format(weights))

    # get the param_list in prog
    prog_vars = list(filter(is_parameter, prog.list_vars()))

    if weights[-9:] == '.pdparams':
        weights = weights[:-9]

    state_dict = fluid.load_program_state(weights, var_list=prog_vars)
    pred_array = state_dict["pred_w"]
    pred_array = np.transpose(pred_array, (1, 0))
    pred_array = np.reshape(
        pred_array, [pred_array.shape[0], pred_array.shape[1], 1, 1, 1])
    state_dict["pred_w"] = pred_array
    fluid.set_program_state(prog, state_dict)
Exemple #3
0
def load_pretrained_model(model, pretrained_model):
    if pretrained_model is not None:
        logger.info('Load pretrained model from {}'.format(pretrained_model))
        if os.path.exists(pretrained_model):
            ckpt_path = os.path.join(pretrained_model, 'model')
            try:
                para_state_dict, _ = fluid.load_dygraph(ckpt_path)
            except:
                para_state_dict = fluid.load_program_state(pretrained_model)

            model_state_dict = model.state_dict()
            keys = model_state_dict.keys()
            num_params_loaded = 0
            for k in keys:
                if k not in para_state_dict:
                    logger.warning("{} is not in pretrained model".format(k))
                elif list(para_state_dict[k].shape) != list(
                        model_state_dict[k].shape):
                    logger.warning(
                        "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
                        .format(k, para_state_dict[k].shape,
                                model_state_dict[k].shape))
                else:
                    model_state_dict[k] = para_state_dict[k]
                    num_params_loaded += 1
            model.set_dict(model_state_dict)
            logger.info("There are {}/{} varaibles are loaded.".format(
                num_params_loaded, len(model_state_dict)))

        else:
            raise ValueError(
                'The pretrained model directory is not Found: {}'.format(
                    pretrained_model))
    else:
        logger.info('No pretrained model to load, train from scratch')
    def load_pretrain_params(self, exe, pretrain, prog, place):
        logger.info("Load pretrain weights from {}, exclude fc layer.".format(pretrain))

        state_dict = fluid.load_program_state(pretrain)
        dict_keys = list(state_dict.keys())
        for name in dict_keys:
            if "fc_0" in name:
                del state_dict[name]
                logger.info('Delete {} from pretrained parameters. Do not load it'.format(name))
        fluid.set_program_state(prog, state_dict)
Exemple #5
0
def load_pretrain_params_from_file(exe, prog, pretrained_file, place):
    """
    The pretrined_file stores ResNet50/101 parameters pretrained on ImageNet.
    However, the conv_weights of Nonlocal model is not the same as that in ResNet50/101 because the 
    input shape is [N, C, T, H, W] and the convolution kernels' shape is [Cout, Cin, Kt, Kh, Kw]. It is
    different from the convolution kernels of ResNet whose shape is typically [Cout, Cin, Kh, Kw].
    So it is recommendated to treat conv_weights specifically.
    The process is as following:
      1, check the params that will be loaded, those with the same name in the target program and pretrained_file. 
         These params will be called common params in this function.
      2, load params from the pretrained_file into a state dict, named file_state_dict in this method.
      3, get the value of common params in the file_state_dict, treat the convolution parameters specifically.
      4, set the value to params in the target program
    """

    logger.info('load pretrained params from {}'.format(pretrained_file))
    if os.path.isdir(pretrained_file):
        # get params' list in prog
        param_list = filter(is_parameter, prog.list_vars())
        param_name_list = []
        param_shape_dict = {}
        for p in param_list:
            param_name_list.append(p.name)
            param_shape_dict[p.name] = p.shape

        # get all params' names in pretrained_file
        param_name_from_file = os.listdir(pretrained_file)
        # get common params of prog and pretrained_file
        # only those common params will be loaded from pretrained_file into prog
        common_names = get_common_names(param_name_list, param_name_from_file)

        file_state_dict = fluid.load_program_state(pretrained_file)
        pretrain_state_dict = {}
        for name in common_names:
            common_array = file_state_dict[name]
            param_shape = param_shape_dict[name]
            if len(param_shape) == 5:
                # transform the loaded conv weights into the format of [Cout, Cin, Kt, Kh, Kw]
                num_inflate = param_shape[2]
                pretrain_state_dict[name] = np.stack(
                    [common_array] * num_inflate, axis=2) / float(num_inflate)
                logger.info("load inflated({}) param {}".format(num_inflate,
                                                                name))
            else:
                pretrain_state_dict[name] = common_array
                logger.info("load param {}".format(name))

        fluid.set_program_state(prog, pretrain_state_dict)
    else:
        raise TypeError(
            "pretrained file {} is not in a directory, not suitable to load params".
            format(pretrained_file))
Exemple #6
0
 def _load_program(self, dir, predicate_fn=None):
     save_path = os.path.join(dir, 'ckpt')
     if not os.path.exists(save_path + '.pdparams'):
         try:
             log.warn('failed to load model, try old-styled saver')
             super(SaverV2, self)._load_program(dir,
                                                predicate_fn=predicate_fn)
         except F.core.EnforceNotMet as e:
             log.exception(e)
             raise RuntimeError(
                 'can not load model from %s, is this a textone checkpoint?'
                 % dir)
     else:
         sd = F.load_program_state(save_path)
         F.set_program_state(self._program.train_program, sd)
Exemple #7
0
    def load_pretrain_params(self, exe, pretrain, prog, place):
        def is_parameter(var):
            return isinstance(
                var, fluid.framework.Parameter) and (not ("fc_0" in var.name))

        logger.info("Load pretrain weights from {}, exclude fc layer.".format(
            pretrain))

        state_dict = fluid.load_program_state(pretrain)
        dict_keys = list(state_dict.keys())
        for name in dict_keys:
            if "fc_0" in name:
                del state_dict[name]
                print('Delete {} from pretrained parameters. Do not load it'.
                      format(name))
        fluid.set_program_state(prog, state_dict)
Exemple #8
0
    def load_pretrain_params(self, exe, pretrain, prog):
        def is_parameter(var):
            return isinstance(var, fluid.framework.Parameter)

        logger.info("Load pretrain weights from {}, exclude fc layer.".format(
            pretrain))

        print("===pretrain===", pretrain)
        state_dict = fluid.load_program_state(pretrain)
        dict_keys = list(state_dict.keys())
        # remove fc layer when pretrain, because the number of classes in final fc may not match
        for name in dict_keys:
            if "fc_0" in name:
                del state_dict[name]
                print('Delete {} from pretrained parameters. Do not load it'.
                      format(name))
        fluid.set_program_state(prog, state_dict)
    def testLoadStaticModel(self):
        # static mode
        a = fluid.data(name="a", shape=[10, 10])
        conv_in = fluid.data(name="conv_in", shape=[None, 10, 10, 10])

        fc_out1 = fluid.layers.fc(a, 10)
        fc_out2 = fluid.layers.fc(a, 20)

        conv_out_1 = fluid.layers.conv2d(conv_in,
                                         num_filters=10,
                                         filter_size=5,
                                         act="relu")
        conv_out_2 = fluid.layers.conv2d(conv_in,
                                         num_filters=10,
                                         filter_size=5,
                                         act="relu")

        conv3d_in = fluid.data(name='conv3d_in',
                               shape=[None, 3, 12, 32, 32],
                               dtype='float32')
        conv3d_out_1 = fluid.layers.conv3d(input=conv3d_in,
                                           num_filters=2,
                                           filter_size=3,
                                           act="relu")
        conv3d_out_2 = fluid.layers.conv3d(input=conv3d_in,
                                           num_filters=2,
                                           filter_size=3,
                                           act="relu")

        batchnorm_in = fluid.data(name="batchnorm_in",
                                  shape=[None, 10],
                                  dtype='float32')
        batchnorm_out_1 = fluid.layers.batch_norm(batchnorm_in)
        batchnorm_out_2 = fluid.layers.batch_norm(batchnorm_in)

        emb_in = fluid.data(name='emb_in', shape=[None, 10], dtype='int64')
        emb_out_1 = fluid.embedding(emb_in, [1000, 100])
        emb_out_2 = fluid.embedding(emb_in, [2000, 200])

        layernorm = fluid.data(name="ln", shape=[None, 10], dtype='float32')
        layernorm_1 = fluid.layers.layer_norm(layernorm)
        layernorm_2 = fluid.layers.layer_norm(layernorm)

        nce_in = fluid.data(name="nce_in", shape=[None, 100], dtype='float32')
        nce_label = fluid.data(name="nce_label",
                               shape=[None, 10],
                               dtype='int64')
        nce_out_1 = fluid.layers.nce(nce_in, nce_label, 10000)
        nce_out_2 = fluid.layers.nce(nce_in, nce_label, 10000)

        prelu_in = fluid.data(name="prelu_in",
                              shape=[None, 5, 10, 10],
                              dtype='float32')
        prelu_out_1 = fluid.layers.prelu(prelu_in, "channel")
        prelu_out_2 = fluid.layers.prelu(prelu_in, "channel")

        bilinear_tensor_pro_x = fluid.data("t1",
                                           shape=[None, 5],
                                           dtype="float32")
        bilinear_tensor_pro_y = fluid.data("t2",
                                           shape=[None, 4],
                                           dtype="float32")

        bilinear_tensor_pro_out_1 = fluid.layers.bilinear_tensor_product(
            x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000)
        bilinear_tensor_pro_out_2 = fluid.layers.bilinear_tensor_product(
            x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000)

        conv2d_trans_in = fluid.data(name="conv2d_trans_in",
                                     shape=[None, 10, 10, 10])

        conv2d_trans_out_1 = fluid.layers.conv2d_transpose(conv2d_trans_in,
                                                           num_filters=10,
                                                           filter_size=5,
                                                           act="relu")
        conv2d_trans_out_2 = fluid.layers.conv2d_transpose(conv2d_trans_in,
                                                           num_filters=10,
                                                           filter_size=5,
                                                           act="relu")

        conv3d_trans_in = fluid.data(name='conv3d_trans_in',
                                     shape=[None, 3, 12, 32, 32],
                                     dtype='float32')
        conv3d_trans_out_1 = fluid.layers.conv3d_transpose(
            input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu")
        conv3d_trans_out_2 = fluid.layers.conv3d_transpose(
            input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu")

        groupnorm_in = fluid.data(name='groupnorm_in',
                                  shape=[None, 8, 32, 32],
                                  dtype='float32')
        groupnorm_out1 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
        groupnorm_out2 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
        '''
        spec_norm = fluid.data(name='spec_norm', shape=[2, 8, 32, 32], dtype='float32')
        spe_norm_out_1 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2)
        spe_norm_out_2 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2)
        '''

        nodes_vector = fluid.data(name='vectors',
                                  shape=[None, 10, 5],
                                  dtype='float32')
        edge_set = fluid.data(name='edge_set',
                              shape=[None, 10, 2],
                              dtype='float32')
        tree_conv_out1 = fluid.contrib.layers.tree_conv(
            nodes_vector, edge_set, 6, 1, 2)
        tree_conv_out2 = fluid.contrib.layers.tree_conv(
            nodes_vector, edge_set, 6, 1, 2)

        para1 = fluid.layers.create_parameter([100, 100],
                                              'float32',
                                              name="weight_test_1")
        para2 = fluid.layers.create_parameter([20, 200],
                                              'float32',
                                              name="weight_test_2")

        para_list = fluid.default_main_program().list_vars()

        exe = fluid.Executor(fluid.CPUPlace(
        ) if not fluid.is_compiled_with_cuda() else fluid.CUDAPlace(0))
        out = exe.run(framework.default_startup_program())

        fluid.save(framework.default_main_program(), "./test_1")

        para_dict = fluid.load_program_state("./test_1")

        new_dict = {}
        for k, v in para_dict.items():
            #print( k, v.shape )
            if k.startswith("fc"):
                name = k.replace("fc", "linear", 1)
                new_dict[name] = v
            else:
                new_dict[k] = v

        with fluid.dygraph.guard():

            class MyTest(fluid.dygraph.Layer):
                def __init__(self):
                    super(MyTest, self).__init__()

                    self.linear1 = Linear(10, 10)
                    self.lienar2 = Linear(10, 20)

                    self.conv2d_1 = Conv2D(num_channels=10,
                                           num_filters=10,
                                           filter_size=5,
                                           act="relu")
                    self.conv2d_2 = Conv2D(num_channels=10,
                                           num_filters=10,
                                           filter_size=5,
                                           act="relu")

                    self.conv3d_1 = Conv3D(num_channels=3,
                                           num_filters=2,
                                           filter_size=3,
                                           act="relu")
                    self.conv3d_2 = Conv3D(num_channels=3,
                                           num_filters=2,
                                           filter_size=3,
                                           act="relu")

                    self.batch_norm_1 = BatchNorm(10)
                    self.batch_norm_2 = BatchNorm(10)

                    self.emb1 = Embedding([1000, 100])
                    self.emb2 = Embedding([2000, 200])

                    self.layer_norm_1 = LayerNorm([10])
                    self.layer_norm_2 = LayerNorm(10)

                    self.nce1 = NCE(10000, 100)
                    self.nce2 = NCE(10000, 100)

                    self.prelu1 = PRelu("channel", channel=5)
                    self.prelu2 = PRelu("channel", channel=5)

                    self.group_norm1 = GroupNorm(8, 4)
                    self.gourp_norm2 = GroupNorm(8, 4)

                    self.w_1 = self.create_parameter([100, 100],
                                                     dtype='float32',
                                                     attr="weight_test_1")
                    self.w_2 = self.create_parameter([20, 200],
                                                     dtype='float32',
                                                     attr="weight_test_2")

            my_test = MyTest()
            my_test.set_dict(new_dict, use_structured_name=False)
            for k, v in my_test.state_dict().items():
                self.assertTrue(np.array_equal(v.numpy(), new_dict[v.name]))
Exemple #10
0
 def load_pretrain_params(self, exe, pretrain, prog):
     logger.info("Load pretrain weights from {}".format(pretrain))
     state_dict = fluid.load_program_state(pretrain)
     fluid.set_program_state(prog, state_dict)
Exemple #11
0
def train(args):
    config = parse_config(args.config)
    train_config = merge_configs(config, 'train', vars(args))
    valid_config = merge_configs(config, 'valid', vars(args))
    print_configs(train_config, 'Train')

    local_rank = fluid.dygraph.parallel.Env().local_rank

    use_data_parallel = args.use_data_parallel
    trainer_count = fluid.dygraph.parallel.Env().nranks
    if not args.use_gpu:
        place = fluid.CPUPlace()
    elif not args.use_data_parallel:
        place = fluid.CUDAPlace(0)
    else:
        #(data_parallel step1/6)
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)

    #load pretrain
    assert os.path.exists(args.weights), \
        "Given dir {} not exist.".format(args.weights)
    pre_state_dict = fluid.load_program_state(args.weights)
    #for key in pre_state_dict.keys():
    #    print('pre_state_dict.key: {}'.format(key))

    with fluid.dygraph.guard(place):
        #1. init model
        video_model = TSM_ResNet("TSM", train_config)

        #2. set weights
        param_state_dict = {}
        model_dict = video_model.state_dict()
        for key in model_dict.keys():
            weight_name = model_dict[key].name
            if weight_name in pre_state_dict.keys(
            ) and weight_name != "fc_0.w_0" and weight_name != "fc_0.b_0":
                print('succ Load weight: {}, shape: {}'.format(
                    weight_name, pre_state_dict[weight_name].shape))
                param_state_dict[key] = pre_state_dict[weight_name]
            else:
                print('fail Load weight: {}'.format(weight_name))
                param_state_dict[key] = model_dict[key]
        video_model.set_dict(param_state_dict)

        #3. init optim
        optimizer = create_optimizer(train_config.TRAIN,
                                     video_model.parameters())
        if use_data_parallel:
            #(data_parallel step2,3/6)
            strategy = fluid.dygraph.parallel.prepare_context()
            video_model = fluid.dygraph.parallel.DataParallel(
                video_model, strategy)

        # 4. load checkpoint
        if args.checkpoint:
            assert os.path.exists(args.checkpoint + ".pdparams"), \
                "Given dir {}.pdparams not exist.".format(args.checkpoint)
            assert os.path.exists(args.checkpoint + ".pdopt"), \
                "Given dir {}.pdopt not exist.".format(args.checkpoint)
            para_dict, opti_dict = fluid.dygraph.load_dygraph(args.checkpoint)
            video_model.set_dict(para_dict)
            optimizer.set_dict(opti_dict)

        # 5. reader
        bs_denominator = 1
        if args.use_gpu:
            gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
            if gpus == "":
                pass
            else:
                gpus = gpus.split(",")
                num_gpus = len(gpus)
                assert num_gpus == train_config.TRAIN.num_gpus, \
                       "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \
                       "shoud be the same as that" \
                       "set in {}({})".format(
                       num_gpus, args.config, train_config.TRAIN.num_gpus)
            bs_denominator = train_config.TRAIN.num_gpus

        train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
                                            bs_denominator)

        train_reader = UCF101Reader(name="TSM", mode="train", cfg=train_config)

        train_reader = train_reader.create_reader()
        if use_data_parallel:
            #(data_parallel step4/6)
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        # 6. train loop
        reader_cost_averager = TimeAverager()
        batch_cost_averager = TimeAverager()
        for epoch in range(train_config.TRAIN.epoch):
            epoch_start = time.time()

            video_model.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0

            # 6.1 for each batch, call model() , backward(), and minimize()
            batch_start = time.time()
            for batch_id, data in enumerate(train_reader()):
                t1 = time.time()
                reader_cost_averager.record(t1 - batch_start)

                x_data = np.array([item[0] for item in data])
                y_data = np.array([item[1] for item in data]).reshape([-1, 1])

                imgs = to_variable(x_data)
                labels = to_variable(y_data)
                labels.stop_gradient = True

                t2 = time.time()
                outputs = video_model(imgs)
                t3 = time.time()

                loss = fluid.layers.cross_entropy(input=outputs,
                                                  label=labels,
                                                  ignore_index=-1)
                avg_loss = fluid.layers.mean(loss)

                acc_top1 = fluid.layers.accuracy(input=outputs,
                                                 label=labels,
                                                 k=1)
                acc_top5 = fluid.layers.accuracy(input=outputs,
                                                 label=labels,
                                                 k=5)

                current_step_lr = optimizer.current_step_lr()
                if use_data_parallel:
                    #(data_parallel step5/6)
                    avg_loss = video_model.scale_loss(avg_loss)
                    avg_loss.backward()
                    video_model.apply_collective_grads()
                else:
                    avg_loss.backward()

                t4 = time.time()
                optimizer.minimize(avg_loss)
                video_model.clear_gradients()

                avg_loss_value = avg_loss.numpy()[0]
                acc_top1_value = acc_top1.numpy()[0]
                acc_top5_value = acc_top5.numpy()[0]

                total_loss += avg_loss_value
                total_acc1 += acc_top1_value
                total_acc5 += acc_top5_value
                total_sample += 1

                t5 = time.time()
                batch_cost_averager.record(
                    t5 - batch_start,
                    num_samples=train_config.TRAIN.batch_size)
                if batch_id % args.log_interval == 0:
                    print(
                        'TRAIN Epoch: %d, iter: %d, loss: %.5f, acc1: %.5f, acc5: %.5f, lr: %.5f, forward_cost:%.5f s, backward_cost:%.5f s, minimize_cost:%.5f s, to_variable_cost: %.5f s, batch_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f samples/sec'
                        % (epoch, batch_id, avg_loss_value, acc_top1_value,
                           acc_top5_value, current_step_lr, t3 - t2, t4 - t3,
                           t5 - t4, t2 - t1, batch_cost_averager.get_average(),
                           reader_cost_averager.get_average(),
                           batch_cost_averager.get_ips_average()))
                    batch_cost_averager.reset()
                    reader_cost_averager.reset()

                batch_start = time.time()

            train_epoch_cost = time.time() - epoch_start
            print(
                'TRAIN End, Epoch {}, avg_loss= {:.5f}, avg_acc1= {:.5f}, avg_acc5= {:.5f}, lr={:.5f}, epoch_cost: {:.5f} sec'
                .format(epoch, total_loss / total_sample,
                        total_acc1 / total_sample, total_acc5 / total_sample,
                        current_step_lr, train_epoch_cost))

            # 6.2 save checkpoint
            if local_rank == 0:
                if not os.path.isdir(args.model_save_dir):
                    os.makedirs(args.model_save_dir)
                model_path = os.path.join(
                    args.model_save_dir,
                    args.model_path_pre + "_epoch{}".format(epoch))
                fluid.dygraph.save_dygraph(video_model.state_dict(),
                                           model_path)
                fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)
                print('save_dygraph End, Epoch {}/{} '.format(
                    epoch, train_config.TRAIN.epoch))

            # 6.3 validation
            video_model.eval()
            val(epoch, video_model, valid_config, args)

        # 7. save final model
        if local_rank == 0:
            model_path = os.path.join(args.model_save_dir,
                                      args.model_path_pre + "_final")
            fluid.dygraph.save_dygraph(video_model.state_dict(), model_path)
            fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)

        logger.info('[TRAIN] training finished')
Exemple #12
0
import paddle
import paddle.fluid as fluid

paddle.enable_static()

x = fluid.data(name="x", shape=[10, 10], dtype='float32')
y = fluid.layers.fc(x, 10)
z = fluid.layers.fc(y, 10)

place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
prog = fluid.default_main_program()

fluid.save(prog, "./temp")
program_state = fluid.load_program_state("./temp")

fluid.set_program_state(prog, program_state)
    def test_ptb_rnn_cpu_float32(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            ptb_model = PtbModel("ptb_model",
                                 hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            exe = fluid.Executor(place)
            sgd = Adam(learning_rate=1e-3)
            x = fluid.layers.data(name="x",
                                  shape=[-1, num_steps, 1],
                                  dtype='int64')
            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
            init_hidden = fluid.layers.data(name="init_hidden",
                                            shape=[1],
                                            dtype='float32')
            init_cell = fluid.layers.data(name="init_cell",
                                          shape=[1],
                                          dtype='float32')

            static_loss, static_last_hidden, static_last_cell = ptb_model(
                x, y, init_hidden, init_cell)

            test_program = fluid.default_main_program().clone(for_test=True)

            add_1 = fluid.layers.fc(static_last_hidden,
                                    size=hidden_size,
                                    num_flatten_dims=2,
                                    bias_attr=False)

            sgd.minimize(static_loss)
            static_param_updated = dict()
            static_param_init = dict()

            out = exe.run(framework.default_startup_program())

            static_loss_value = None
            static_last_cell_value = None
            static_last_hidden_value = None
            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                fetch_list = [
                    static_loss, static_last_hidden, static_last_cell
                ]
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "x": x_data,
                                  "y": y_data,
                                  "init_hidden": init_hidden_data,
                                  "init_cell": init_cell_data
                              },
                              fetch_list=fetch_list)
                static_loss_value = out[0]
                static_last_hidden_value = out[1]
                static_last_cell_value = out[2]

            # get value before save
            main_program = framework.default_main_program()
            base_map = {}
            for var in main_program.list_vars():
                if isinstance(var, framework.Parameter) or var.persistable:
                    t = np.array(fluid.global_scope().find_var(
                        var.name).get_tensor())
                    # make sure all the paramerter or optimzier var have been update
                    self.assertTrue(np.sum(np.abs(t)) != 0)
                    base_map[var.name] = t

            fluid.save(main_program, "./test_1")

            # set var to zero
            for var in main_program.list_vars():
                if isinstance(var, framework.Parameter) or var.persistable:
                    ten = fluid.global_scope().find_var(var.name).get_tensor()
                    ten.set(np.zeros_like(np.array(ten)), place)

                    new_t = np.array(fluid.global_scope().find_var(
                        var.name).get_tensor())
                    # make sure all the paramerter or optimzier var have been set to zero
                    self.assertTrue(np.sum(np.abs(new_t)) == 0)

            #fluid.load(test_program, "./test_1", None )
            program_state = fluid.load_program_state("./test_1")
            fluid.set_program_state(test_program, program_state)

            for var in test_program.list_vars():
                if isinstance(var, framework.Parameter) or var.persistable:
                    print(var.name)
                    new_t = np.array(fluid.global_scope().find_var(
                        var.name).get_tensor())
                    base_t = base_map[var.name]
                    self.assertTrue(np.array_equal(new_t, base_t))
Exemple #14
0
    def test_large_parameters_static_save(self):
        # enable static mode
        paddle.enable_static()
        with new_program_scope():
            # create network
            x = paddle.static.data(name="static_save_load_large_x",
                                   shape=[None, 10],
                                   dtype='float32')
            z = paddle.static.nn.fc(x, LARGE_PARAM, bias_attr=False)
            place = paddle.CPUPlace()
            exe = paddle.static.Executor(place)
            exe.run(paddle.static.default_startup_program())
            prog = paddle.static.default_main_program()

            base_map = {}
            for var in prog.list_vars():
                if isinstance(var, framework.Parameter) or var.persistable:
                    t = np.array(fluid.global_scope().find_var(
                        var.name).get_tensor())
                    # make sure all the paramerter or optimizer var have been update
                    self.assertTrue(np.sum(np.abs(t)) != 0)
                    base_map[var.name] = t

            path = os.path.join("test_static_save_load_large_param",
                                "static_save")
            protocol = 4
            paddle.fluid.save(prog, path, pickle_protocol=protocol)
            # set var to zero
            for var in prog.list_vars():
                if isinstance(var, framework.Parameter) or var.persistable:
                    ten = fluid.global_scope().find_var(var.name).get_tensor()
                    ten.set(np.zeros_like(np.array(ten)), place)

                    new_t = np.array(fluid.global_scope().find_var(
                        var.name).get_tensor())
                    self.assertTrue(np.sum(np.abs(new_t)) == 0)

            paddle.fluid.load(prog, path)

            for var in prog.list_vars():
                if isinstance(var, framework.Parameter) or var.persistable:
                    new_t = np.array(fluid.global_scope().find_var(
                        var.name).get_tensor())
                    base_t = base_map[var.name]
                    self.assertTrue(np.array_equal(new_t, base_t))

            # set var to zero
            for var in prog.list_vars():
                if isinstance(var, framework.Parameter) or var.persistable:
                    ten = fluid.global_scope().find_var(var.name).get_tensor()
                    ten.set(np.zeros_like(np.array(ten)), place)

                    new_t = np.array(fluid.global_scope().find_var(
                        var.name).get_tensor())
                    self.assertTrue(np.sum(np.abs(new_t)) == 0)

            program_state = fluid.load_program_state(path)
            fluid.set_program_state(prog, program_state)
            for var in prog.list_vars():
                if isinstance(var, framework.Parameter) or var.persistable:
                    new_t = np.array(fluid.global_scope().find_var(
                        var.name).get_tensor())
                    base_t = base_map[var.name]
                    self.assertTrue(np.array_equal(new_t, base_t))