Example #1
0
    def check_l2decay(self, place, model):
        paddle.manual_seed(1)
        paddle.framework.random._manual_program_seed(1)
        main_prog = fluid.framework.Program()
        startup_prog = fluid.framework.Program()

        with self.scope_prog_guard(main_prog=main_prog,
                                   startup_prog=startup_prog):
            data = fluid.layers.data(name="words",
                                     shape=[1],
                                     dtype="int64",
                                     lod_level=1)
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")

            avg_cost_l2 = model(data, label, len(self.word_dict))

            param_list = fluid.default_main_program().block(0).all_parameters()
            para_sum = []
            for para in param_list:
                para_mul = fluid.layers.square(x=para)
                para_sum.append(fluid.layers.reduce_sum(input=para_mul))
            avg_cost_l2 += fluid.layers.sums(para_sum) * .5

            optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
            optimizer.minimize(avg_cost_l2)
            param_sum = self.run_program(place, [data, label])
        return param_sum
def simple_fc_net(places, use_legacy_py_reader, use_double_buffer):
    paddle.manual_seed(1)
    paddle.framework.random._manual_program_seed(1)
    startup_prog = fluid.Program()
    main_prog = fluid.Program()

    with fluid.unique_name.guard():
        with fluid.program_guard(main_prog, startup_prog):
            image = fluid.layers.data(
                name='image', shape=[784], dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            py_reader = fluid.io.PyReader(
                feed_list=[image, label],
                capacity=4,
                iterable=not use_legacy_py_reader,
                use_double_buffer=use_double_buffer)
            hidden = image
            for hidden_size in [10, 20, 30]:
                hidden = fluid.layers.fc(
                    hidden,
                    size=hidden_size,
                    act='tanh',
                    bias_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.Constant(value=1.0)))

            predict_label = fluid.layers.fc(hidden,
                                            size=CLASS_NUM,
                                            act='softmax')
            loss = fluid.layers.mean(
                fluid.layers.cross_entropy(
                    input=predict_label, label=label))

            optimizer = fluid.optimizer.Adam()
            optimizer.minimize(loss)
    return startup_prog, main_prog, py_reader, loss
        def run_simple_conv(inp_np, use_scaler=True):
            paddle.manual_seed(10)
            paddle.framework.random._manual_program_seed(10)
            with fluid.dygraph.guard():
                model = SimpleConv(
                    num_channels=3,
                    num_filters=64,
                    filter_size=7,
                    stride=2,
                    act='relu')
                optimizer = fluid.optimizer.SGDOptimizer(
                    learning_rate=0.01, parameter_list=model.parameters())
                scaler = fluid.dygraph.AmpScaler(init_loss_scaling=1024)
                data = fluid.dygraph.to_variable(inp_np)

                out = model(data)
                loss = fluid.layers.mean(out)
                if use_scaler:
                    print('use scaler')
                    scaled_loss = scaler.scale(loss)
                    scaled_loss.backward()
                    optimize_ops, params_grads = scaler.minimize(optimizer,
                                                                 scaled_loss)
                else:
                    print('use no scaler')
                    loss.backward()
                    optimize_ops, params_grads = optimizer.minimize(loss)
            return optimize_ops, params_grads
Example #4
0
def train(conf_dict, to_static):
    """
    train process
    """
    program_translator = ProgramTranslator()
    program_translator.enable(to_static)

    # Get device
    if fluid.is_compiled_with_cuda():
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    with fluid.dygraph.guard(place):
        paddle.manual_seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)

        conf_dict['dict_size'] = len(vocab)
        conf_dict['seq_len'] = args.seq_len

        net = BOW(conf_dict)
        loss = HingeLoss(conf_dict)
        optimizer = fluid.optimizer.AdamOptimizer(
            learning_rate=0.001,
            beta1=0.9,
            beta2=0.999,
            epsilon=1e-08,
            parameter_list=net.parameters())

        metric = fluid.metrics.Auc(name="auc")

        global_step = 0
        losses = []

        train_loader = fluid.io.DataLoader.from_generator(
            capacity=16,
            return_list=True,
            iterable=True,
            use_double_buffer=True)
        get_train_examples = simnet_process.get_reader("train",
                                                       epoch=args.epoch)
        train_loader.set_sample_list_generator(
            paddle.batch(get_train_examples, batch_size=args.batch_size),
            place)

        for left, pos_right, neg_right in train_loader():
            left = fluid.layers.reshape(left, shape=[-1, 1])
            pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1])
            neg_right = fluid.layers.reshape(neg_right, shape=[-1, 1])
            net.train()
            global_step += 1
            left_feat, pos_score = net(left, pos_right)
            pred = pos_score
            _, neg_score = net(left, neg_right)
            avg_cost = loss.compute(pos_score, neg_score)
            losses.append(np.mean(avg_cost.numpy()))
            avg_cost.backward()
            optimizer.minimize(avg_cost)
            net.clear_gradients()
    return losses
Example #5
0
 def setUp(self):
     self.model_path = "model.test_jit_save_load"
     # enable dygraph mode
     fluid.enable_dygraph()
     # config seed
     paddle.manual_seed(SEED)
     paddle.framework.random._manual_program_seed(SEED)
Example #6
0
    def test_generator_randperm_static(self):

        fluid.disable_dygraph()

        paddle.manual_seed(123123143)

        startup_program = fluid.Program()
        train_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):
            # example 1:
            # attr shape is a list which doesn't contain tensor Variable.
            result_1 = paddle.randperm(10)
            result_2 = paddle.randperm(10)

            exe = fluid.Executor(fluid.CPUPlace())
            exe.run(startup_program)
            out1 = exe.run(train_program,
                           feed={},
                           fetch_list=[result_1, result_2])

            paddle.manual_seed(123123143)
            out2 = exe.run(train_program,
                           feed={},
                           fetch_list=[result_1, result_2])

            out1_res1 = np.array(out1[0])
            out1_res2 = np.array(out1[1])
            out2_res1 = np.array(out2[0])
            out2_res2 = np.array(out2[1])

            if not core.is_compiled_with_cuda():
                print(">>>>>>> randperm static >>>>>>>")
                self.assertTrue(np.allclose(out1_res1, out2_res1))
                self.assertTrue(np.allclose(out1_res2, out2_res2))
                self.assertTrue(not np.allclose(out1_res2, out1_res1))
Example #7
0
    def training_test(self, is_sparse):
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            paddle.manual_seed(1)
            start_up = fluid.default_startup_program()
            x = np.arange(6).reshape(6)
            path_table = np.array([(1, 2, -1), (1, 2, -1)]).astype('int64')
            path_code = np.array([(1, 0, -1), (0, 0, -1)]).astype('int64')
            label = np.array([1, 4]).astype('int64')

            loss, data_list = self.hs_net_conf(is_sparse)
            optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
            optimizer.minimize(loss)

            main_program = fluid.default_main_program()
            place = fluid.CPUPlace()
            feeder = fluid.DataFeeder(feed_list=data_list, place=place)
            exe = fluid.Executor(place)

            exe.run(start_up)
            result = list()
            for i in range(10):
                data = [([[x[i % 2]]], [list(path_table[i % 2])],
                         [list(path_code[i % 2])], [label[i % 2]])]

                loss_val = exe.run(main_program,
                                   feed=feeder.feed(data),
                                   fetch_list=[loss])
                result.append(loss_val)
        return result
    def check_weight_decay2(self, place, model):
        paddle.manual_seed(1)
        paddle.framework.random._manual_program_seed(1)
        main_prog = fluid.framework.Program()
        startup_prog = fluid.framework.Program()

        with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog):
            data = fluid.layers.data(
                name="words", shape=[1], dtype="int64", lod_level=1)
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")

            avg_cost = model(data, label, len(self.word_dict))

            param_list = [(var, var * self.learning_rate)
                          for var in main_prog.block(0).all_parameters()]

            optimizer = fluid.optimizer.Adam(learning_rate=self.learning_rate)

            optimizer.minimize(avg_cost)
            for params in param_list:
                updated_p = fluid.layers.elementwise_sub(
                    x=params[0], y=params[1])
                fluid.layers.assign(input=updated_p, output=params[0])

            param_sum = self.run_program(place, [data, label])
        return param_sum
    def setUp(self):
        # enable dygraph mode
        place = paddle.CPUPlace()
        paddle.disable_static(place)

        # config seed
        paddle.manual_seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)

        # create network
        self.layer = LinearNet()
        self.loss_fn = nn.CrossEntropyLoss()
        self.sgd = opt.SGD(learning_rate=0.001,
                           parameters=self.layer.parameters())

        # create data loader
        dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
        self.loader = paddle.io.DataLoader(
            dataset,
            places=place,
            batch_size=BATCH_SIZE,
            shuffle=True,
            drop_last=True,
            num_workers=2)

        # train
        train(self.layer, self.loader, self.loss_fn, self.sgd)

        # save
        self.model_path = "linear.example.model"
        paddle.jit.save(self.layer, self.model_path)
Example #10
0
 def setUp(self):
     self.linear_size = 4
     self.model_path = "model.jit_prune_model_and_load"
     # enable dygraph mode
     fluid.enable_dygraph()
     # config seed
     paddle.manual_seed(SEED)
     paddle.framework.random._manual_program_seed(SEED)
        def multihead_attention_test_helper(self_attention, cache):
            paddle.manual_seed(2020)
            paddle.framework.random._manual_program_seed(2020)
            # self_attention|cross_attention, cache|No cache
            with fluid.dygraph.guard(fluid.CPUPlace()):

                # generate params for multi_head_attention
                batch_size, query_length, key_length, value_length, embed_dim, kdim, vdim, num_heads, attn_dropout = generate_basic_params(
                    "attn", self_attention)
                query, key, value, attn_mask, cache_dict = generate_query_key_value_cache(
                    self_attention, batch_size, num_heads, query_length,
                    embed_dim, key_length, value_length, kdim, vdim, cache)
                if cache and self_attention:
                    attn_mask = np.concatenate((attn_mask, attn_mask), axis=3)
                need_weight, param_attr, bias_attr = False, None, None
                # call paddle's function
                multi_head_attn = MultiHeadAttention(embed_dim, num_heads,
                                                     attn_dropout, kdim, vdim,
                                                     need_weight, param_attr,
                                                     bias_attr)
                # construct cache object
                cache_obj = None
                if cache_dict:
                    if 'k' and 'v' in cache_dict:
                        cache_obj = multi_head_attn.Cache(
                            paddle.to_variable(cache_dict['k']),
                            paddle.to_variable(cache_dict['v']))
                    elif 'static_k' and 'static_v' in cache_dict:
                        cache_obj = multi_head_attn.StaticCache(
                            paddle.to_variable(cache_dict['static_k']),
                            paddle.to_variable(cache_dict['static_v']))
                if attn_mask is not None:
                    attn_output = multi_head_attn(
                        paddle.to_variable(query), paddle.to_variable(key),
                        paddle.to_variable(value),
                        paddle.to_variable(attn_mask), cache_obj)
                else:
                    attn_output = multi_head_attn(paddle.to_variable(query),
                                                  paddle.to_variable(key),
                                                  paddle.to_variable(value),
                                                  attn_mask, cache_obj)
                attn_output = attn_output[0] if cache_dict else attn_output

                # implementation by numpy
                # compute q, k, v
                q, k, v, _ = prepare_qkv(query, key, value, num_heads,
                                         embed_dim, self_attention,
                                         multi_head_attn, cache_dict)
                # scale dot product attention
                attn_heads = scaled_dot_product_attention(
                    q, k, v, embed_dim // num_heads, attn_mask,
                    multi_head_attn)
                out_proj_weight = multi_head_attn.out_proj.weight.numpy()
                reference = fc(attn_heads, out_proj_weight)

                np.testing.assert_allclose(attn_output.numpy(),
                                           reference,
                                           atol=1e-6)
Example #12
0
 def setUp(self):
     self.linear_size = 4
     self.model_path = "model.jit_multi_load"
     # enable dygraph mode
     fluid.enable_dygraph()
     # config seed
     paddle.manual_seed(SEED)
     paddle.framework.random._manual_program_seed(SEED)
     # train and save base model
     self.train_and_save_orig_model()
Example #13
0
def train(args, to_static):
    program_translator.enable(to_static)
    place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \
        else fluid.CPUPlace()

    with fluid.dygraph.guard(place):
        np.random.seed(SEED)
        paddle.manual_seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)

        train_reader = fake_data_reader(args.class_num, args.vocab_size,
                                        args.batch_size, args.padding_size)
        train_loader = fluid.io.DataLoader.from_generator(capacity=24)
        train_loader.set_sample_list_generator(train_reader)

        if args.model_type == 'cnn_net':
            model = CNN(args.vocab_size, args.batch_size, args.padding_size)
        elif args.model_type == 'bow_net':
            model = BOW(args.vocab_size, args.batch_size, args.padding_size)
        elif args.model_type == 'gru_net':
            model = GRU(args.vocab_size, args.batch_size, args.padding_size)
        elif args.model_type == 'bigru_net':
            model = BiGRU(args.vocab_size, args.batch_size, args.padding_size)
        sgd_optimizer = fluid.optimizer.Adagrad(
            learning_rate=args.lr, parameter_list=model.parameters())

        loss_data = []
        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_loader()):
                word_ids, labels, seq_lens = data
                doc = to_variable(word_ids.numpy().reshape(-1)).astype('int64')
                label = labels.astype('int64')

                model.train()
                avg_cost, prediction, acc = model(doc, label)
                loss_data.append(avg_cost.numpy()[0])

                avg_cost.backward()
                sgd_optimizer.minimize(avg_cost)
                model.clear_gradients()

                if batch_id % args.log_step == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("step: %d, ave loss: %f, speed: %f steps/s" %
                          (batch_id, avg_cost.numpy()[0],
                           args.log_step / used_time))
                    time_begin = time.time()

                if batch_id == args.train_step:
                    break
                batch_id += 1
    return loss_data
    def check(self, place, use_cuda):
        paddle.manual_seed(1)
        paddle.framework.random._manual_program_seed(1)
        main_program = fluid.Program()
        startup_program = fluid.Program()
        x, y, loss = self.build_program(main_program, startup_program,
                                        use_cuda)
        exe = fluid.Executor(place)
        iters = 10
        batch_size = 16
        feeder = fluid.DataFeeder(feed_list=[x, y], place=place)

        # close fused_bn_act_ops
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_bn_act_ops = False
        binary = fluid.CompiledProgram(main_program).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=batch_size)
        loss_vals = []
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup_program)
            for _ in range(iters):
                data = next(train_reader())
                loss_v = exe.run(binary,
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
                loss_vals.append(loss_v[0][0])

        # open fused_bn_act_ops
        build_strategy_fused = fluid.BuildStrategy()
        build_strategy_fused.fuse_bn_act_ops = True
        binary_fused = fluid.CompiledProgram(main_program).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy_fused)
        train_reader_fused = paddle.batch(paddle.dataset.mnist.train(),
                                          batch_size=batch_size)
        loss_vals_fused = []
        scope_fused = fluid.Scope()
        with fluid.scope_guard(scope_fused):
            exe.run(startup_program)
            for _ in range(iters):
                data = next(train_reader_fused())
                loss_v = exe.run(binary_fused,
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
                loss_vals_fused.append(loss_v[0][0])

        # check loss
        for i in range(iters):
            self.assertAlmostEqual(loss_vals[i],
                                   loss_vals_fused[i],
                                   delta=1e-5)
    def __init__(self, cfg):
        paddle.manual_seed(1)
        paddle.framework.random._manual_program_seed(1)

        self.generator = Generator(cfg)
        self.discriminator = Discriminator(cfg)

        self.g_optimizer = build_optimizer(self.generator, cfg)
        self.d_optimizer = build_optimizer(self.discriminator, cfg)

        self.cfg = cfg

        fluid.set_flags({'FLAGS_sort_sum_gradient': cfg.sort_sum_gradient})
    def __init__(self, cfg):
        self.cfg = cfg

        def create_data_layer():
            image_real = fluid.data(
                shape=[None, 3, cfg.image_size, cfg.image_size],
                dtype='float32',
                name='image_real')
            label_org = fluid.data(
                shape=[None, cfg.c_dim], dtype='float32', name='label_org')
            label_trg = fluid.data(
                shape=[None, cfg.c_dim], dtype='float32', name='label_trg')
            return image_real, label_org, label_trg

        paddle.manual_seed(cfg.seed)
        paddle.framework.random._manual_program_seed(cfg.seed)
        self.gen_program = fluid.Program()
        gen_startup_program = fluid.Program()

        with fluid.program_guard(self.gen_program, gen_startup_program):
            with fluid.unique_name.guard():
                image_real, label_org, label_trg = create_data_layer()
                generator = Generator(cfg)
                discriminator = Discriminator(cfg)
                g_loss = get_generator_loss(image_real, label_org, label_trg,
                                            generator, discriminator, cfg)
                build_optimizer(generator, cfg, loss=g_loss)

        self.dis_program = fluid.Program()
        dis_startup_program = fluid.Program()
        with fluid.program_guard(self.dis_program, dis_startup_program):
            with fluid.unique_name.guard():
                image_real, label_org, label_trg = create_data_layer()
                generator = Generator(cfg)
                discriminator = Discriminator(cfg)
                d_loss = get_discriminator_loss(image_real, label_org,
                                                label_trg, generator,
                                                discriminator, cfg)
                build_optimizer(discriminator, cfg, loss=d_loss)

        self.executor = fluid.Executor(cfg.place)
        self.scope = fluid.Scope()

        with fluid.scope_guard(self.scope):
            self.executor.run(gen_startup_program)
            self.executor.run(dis_startup_program)

        self.g_loss = g_loss
        self.d_loss = d_loss
 def check_with_place(self, place):
     scope = core.Scope()
     out = scope.var("X").get_selected_rows()
     paddle.manual_seed(10)
     op = Operator("uniform_random",
                   Out="X",
                   shape=[1000, 784],
                   min=-5.0,
                   max=10.0,
                   seed=10)
     op.run(scope, place)
     self.assertEqual(out.get_tensor().shape(), [1000, 784])
     hist, prob = output_hist(np.array(out.get_tensor()))
     self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01),
                     "hist: " + str(hist))
    def setUp(self):
        self.op_type = "gaussian_random"
        self.set_attrs()
        self.inputs = {}
        self.use_mkldnn = False
        self.attrs = {
            "shape": [123, 92],
            "mean": self.mean,
            "std": self.std,
            "seed": 10,
            "use_mkldnn": self.use_mkldnn
        }
        paddle.manual_seed(10)

        self.outputs = {'Out': np.zeros((123, 92), dtype='float32')}
Example #19
0
    def test_generator_uniform_random_static(self):
        fluid.disable_dygraph()

        gen = paddle.manual_seed(123123143)

        startup_program = fluid.Program()
        train_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):
            # example 1:
            # attr shape is a list which doesn't contain tensor Variable.
            result_1 = fluid.layers.uniform_random(shape=[3, 4])
            result_2 = fluid.layers.uniform_random(shape=[3, 4])

            exe = fluid.Executor(fluid.CPUPlace())
            exe.run(startup_program)
            out1 = exe.run(train_program,
                           feed={},
                           fetch_list=[result_1, result_2])
            #gen.set_state(cur_state)
            gen.manual_seed(123123143)
            out2 = exe.run(train_program,
                           feed={},
                           fetch_list=[result_1, result_2])

            out1_res1 = np.array(out1[0])
            out1_res2 = np.array(out1[1])
            out2_res1 = np.array(out2[0])
            out2_res2 = np.array(out2[1])

            if not core.is_compiled_with_cuda():
                self.assertTrue(np.allclose(out1_res1, out2_res1))
                self.assertTrue(np.allclose(out1_res2, out2_res2))
                self.assertTrue(not np.allclose(out1_res2, out1_res1))
Example #20
0
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()

        with fluid.dygraph.guard(place):
            try:
                paddle.manual_seed(seed)
                paddle.framework.random._manual_program_seed(seed)
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
                    parameter_list=mlp.parameters())
            except Exception as e:
                assert str(e) == exception_message
    def test_attr_tensor_API(self):
        _seed = 10
        gen = paddle.manual_seed(_seed)
        gen._is_init_py = False
        startup_program = fluid.Program()
        train_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):
            _min = 5
            _max = 10

            ret = fluid.layers.nn.uniform_random([2, 3, 2],
                                                 min=_min,
                                                 max=_max,
                                                 seed=_seed)
            ret_2 = fluid.layers.nn.uniform_random([2, 3, 2],
                                                   min=_min,
                                                   max=_max,
                                                   seed=_seed)
            res = fluid.layers.equal(ret, ret_2)
            place = fluid.CPUPlace()
            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            exe = fluid.Executor(place)

            exe.run(startup_program)
            ret_value, cmp_value = exe.run(train_program,
                                           fetch_list=[ret, res])
            self.assertTrue(np.array(cmp_value).all())
            for i in ret_value.flatten():
                self.assertGreaterEqual(i, _min)
                self.assertLess(i, _max)
    def test_gen_TruncatedNormal_initializer(self):
        fluid.disable_dygraph()

        gen = paddle.manual_seed(123123143)
        cur_state = paddle.get_cuda_rng_state()

        startup_program = fluid.Program()
        train_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):
            # example 1:
            # attr shape is a list which doesn't contain tensor Variable.
            x = fluid.layers.uniform_random(shape=[2, 10])
            result_1 = fluid.layers.fc(
                input=x,
                size=10,
                param_attr=fluid.initializer.TruncatedNormal(loc=0.0,
                                                             scale=2.0))
            result_2 = fluid.layers.fc(
                input=x,
                size=10,
                param_attr=fluid.initializer.TruncatedNormal(loc=0.0,
                                                             scale=2.0))

            exe = fluid.Executor(fluid.CPUPlace())
            exe.run(startup_program)
            out1 = exe.run(train_program,
                           feed={},
                           fetch_list=[result_1, result_2])

        paddle.manual_seed(123123143)
        with fluid.program_guard(train_program, startup_program):
            exe.run(startup_program)
            out2 = exe.run(train_program,
                           feed={},
                           fetch_list=[result_1, result_2])

        out1_res1 = np.array(out1[0])
        out1_res2 = np.array(out1[1])
        out2_res1 = np.array(out2[0])
        out2_res2 = np.array(out2[1])

        if core.is_compiled_with_cuda():
            print(">>>>>>> truncated normal static >>>>>>>")
            self.assertTrue(np.allclose(out1_res1, out2_res1))
            self.assertTrue(np.allclose(out1_res2, out2_res2))
            self.assertTrue(not np.allclose(out1_res2, out1_res1))
    def test_api(self):
        paddle.manual_seed(10)
        x = fluid.layers.data('x', shape=[16], dtype='float32', lod_level=1)
        y = fluid.layers.fc(x,
                            size=16,
                            param_attr=fluid.initializer.Uniform(low=-0.5,
                                                                 high=0.5,
                                                                 seed=10,
                                                                 diag_num=16,
                                                                 diag_step=16,
                                                                 diag_val=1.0))

        place = fluid.CPUPlace()
        x_tensor = fluid.create_lod_tensor(
            np.random.rand(3, 16).astype("float32"), [[1, 2]], place)
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        ret = exe.run(feed={'x': x_tensor}, fetch_list=[y], return_numpy=False)
Example #24
0
    def test_repeated_regularization(self):
        l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
        l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01)
        fc_param_attr = fluid.ParamAttr(regularizer=l1)
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            x = fluid.layers.uniform_random([2, 2, 3])
            out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
            loss = fluid.layers.reduce_sum(out)
            sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
            sgd.minimize(loss)
        with fluid.dygraph.guard():
            input = fluid.dygraph.to_variable(
                np.random.randn(3, 2).astype('float32'))
            paddle.manual_seed(1)
            paddle.framework.random._manual_program_seed(1)

            linear1 = fluid.dygraph.Linear(2,
                                           2,
                                           param_attr=fc_param_attr,
                                           bias_attr=fc_param_attr)
            linear2 = fluid.dygraph.Linear(2,
                                           2,
                                           param_attr=fc_param_attr,
                                           bias_attr=fc_param_attr)

            loss1 = linear1(input)
            loss1.backward()
            # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr

            fluid.optimizer.SGD(parameter_list=linear1.parameters(),
                                learning_rate=1e-2,
                                regularization=l2).minimize(loss1)
            # only set l1 in fluid.ParamAttr
            loss2 = linear2(input)
            loss2.backward()
            fluid.optimizer.SGD(parameter_list=linear2.parameters(),
                                learning_rate=1e-2).minimize(loss2)
            # they should both be applied by l1, and keep the same
            self.assertTrue(
                np.allclose(linear1.weight.numpy(), linear2.weight.numpy()),
                "weight should use the regularization in fluid.ParamAttr!")
            self.assertTrue(
                np.allclose(linear1.bias.numpy(), linear2.bias.numpy()),
                "bias should use the regularization in fluid.ParamAttr!")
 def check_with_place(self, place):
     scope = core.Scope()
     out = scope.var("X").get_selected_rows()
     shape_1 = scope.var("shape1").get_tensor()
     shape_1.set(np.array([1000]).astype("int64"), place)
     shape_2 = scope.var("shape2").get_tensor()
     shape_2.set(np.array([784]).astype("int64"), place)
     paddle.manual_seed(10)
     op = Operator("uniform_random",
                   ShapeTensorList=["shape1", "shape2"],
                   Out="X",
                   min=-5.0,
                   max=10.0,
                   seed=10)
     op.run(scope, place)
     self.assertEqual(out.get_tensor().shape(), [1000, 784])
     hist, prob = output_hist(np.array(out.get_tensor()))
     self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01),
                     "hist: " + str(hist))
Example #26
0
    def test_compiled_program_base(self):
        with new_program_scope():
            paddle.manual_seed(self.seed)
            paddle.framework.random._manual_program_seed(self.seed)
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
            exe = fluid.Executor(place)

            loss = simple_fc_net()
            exe.run(fluid.default_startup_program())
            compiled_prog = fluid.CompiledProgram(fluid.default_main_program())

            loss_data, = exe.run(compiled_prog,
                                 feed={
                                     "image": self.img,
                                     "label": self.label
                                 },
                                 fetch_list=[loss.name])
            self.assertTrue(np.array_equal(loss_data[0], self.loss))
Example #27
0
    def setUpClass(cls):
        if not fluid.is_compiled_with_cuda():
            self.skipTest('module not tested when ONLY_CPU compling')
        cls.device = paddle.set_device('gpu')
        fluid.enable_dygraph(cls.device)

        sp_num = 1280
        cls.train_dataset = MnistDataset(mode='train', sample_num=sp_num)
        cls.val_dataset = MnistDataset(mode='test', sample_num=sp_num)
        cls.test_dataset = MnistDataset(mode='test',
                                        return_label=False,
                                        sample_num=sp_num)

        cls.train_loader = fluid.io.DataLoader(cls.train_dataset,
                                               places=cls.device,
                                               batch_size=64)
        cls.val_loader = fluid.io.DataLoader(cls.val_dataset,
                                             places=cls.device,
                                             batch_size=64)
        cls.test_loader = fluid.io.DataLoader(cls.test_dataset,
                                              places=cls.device,
                                              batch_size=64)

        seed = 333
        paddle.manual_seed(seed)
        paddle.framework.random._manual_program_seed(seed)

        dy_lenet = LeNetDygraph()
        cls.init_param = dy_lenet.state_dict()
        dynamic_train(dy_lenet, cls.train_loader)

        cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader)

        cls.inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')]
        cls.labels = [InputSpec([None, 1], 'int64', 'label')]

        cls.save_dir = tempfile.mkdtemp()
        cls.weight_path = os.path.join(cls.save_dir, 'lenet')
        fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path)

        fluid.disable_dygraph()
    def _prepare_program(self, config, parallel=True):
        paddle.manual_seed(config.random_seed)
        self.main_program = fluid.Program()
        self.startup_program = fluid.Program()
        with fluid.program_guard(self.main_program, self.startup_program):
            with fluid.unique_name.guard():
                res_vars = lm_model(config.hidden_size,
                                    config.vocab_size,
                                    config.batch_size,
                                    num_layers=config.num_layers,
                                    num_steps=config.num_steps,
                                    init_scale=config.init_scale,
                                    dropout=config.dropout,
                                    rnn_model=config.rnn_model)
                self.loss, self.last_hidden, self.last_cell, self.feed_order = res_vars

                fluid.clip.set_gradient_clip(
                    clip=fluid.clip.GradientClipByGlobalNorm(
                        clip_norm=config.max_grad_norm))

                self.learning_rate = fluid.layers.create_global_var(
                    name="learning_rate",
                    shape=[1],
                    value=1.0,
                    dtype='float32',
                    persistable=True)

                optimizer = fluid.optimizer.SGD(
                    learning_rate=self.learning_rate)
                optimizer.minimize(self.loss)

        self.exe.run(self.startup_program)

        if parallel:
            self.train_program = fluid.compiler.CompiledProgram(
                self.main_program).with_data_parallel(
                    loss_name=self.loss.name,
                    build_strategy=self.build_strategy,
                    exec_strategy=self.exec_strategy)
        else:
            self.train_program = self.main_program
Example #29
0
    def fit(self, dynamic, num_replicas=None, rank=None):
        fluid.enable_dygraph(self.device) if dynamic else None
        seed = 333
        paddle.manual_seed(seed)
        paddle.framework.random._manual_program_seed(seed)

        net = LeNet(classifier_activation=None)
        optim_new = fluid.optimizer.Adam(learning_rate=0.001,
                                         parameter_list=net.parameters())
        model = Model(net, inputs=self.inputs, labels=self.labels)
        model.prepare(optim_new,
                      loss=CrossEntropyLoss(reduction="sum"),
                      metrics=Accuracy())
        model.fit(self.train_dataset, batch_size=64, shuffle=False)

        result = model.evaluate(self.val_dataset, batch_size=64)
        np.testing.assert_allclose(result['acc'], self.acc1)

        train_sampler = DistributedBatchSampler(self.train_dataset,
                                                batch_size=64,
                                                shuffle=False,
                                                num_replicas=num_replicas,
                                                rank=rank)
        val_sampler = DistributedBatchSampler(self.val_dataset,
                                              batch_size=64,
                                              shuffle=False,
                                              num_replicas=num_replicas,
                                              rank=rank)

        train_loader = fluid.io.DataLoader(self.train_dataset,
                                           batch_sampler=train_sampler,
                                           places=self.device,
                                           return_list=True)

        val_loader = fluid.io.DataLoader(self.val_dataset,
                                         batch_sampler=val_sampler,
                                         places=self.device,
                                         return_list=True)

        model.fit(train_loader, val_loader)
        fluid.disable_dygraph() if dynamic else None
    def test_generator_gaussian_random_dygraph(self):
        """Test Generator seed."""
        fluid.enable_dygraph()

        paddle.manual_seed(12312321111)
        x = fluid.layers.gaussian_random([120], dtype="float32")
        st1 = paddle.get_cuda_rng_state()
        x1 = fluid.layers.gaussian_random([120], dtype="float32")
        paddle.set_cuda_rng_state(st1)
        x2 = fluid.layers.gaussian_random([120], dtype="float32")
        paddle.manual_seed(12312321111)
        x3 = fluid.layers.gaussian_random([120], dtype="float32")
        x_np = x.numpy()
        x1_np = x1.numpy()
        x2_np = x2.numpy()
        x3_np = x3.numpy()

        if core.is_compiled_with_cuda():
            print(">>>>>>> gaussian random dygraph >>>>>>>")
            self.assertTrue(np.allclose(x1_np, x2_np))
            self.assertTrue(np.allclose(x_np, x3_np))