def mlp_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sequence_len = 512
        input = static.data(name="input", shape=[batch_size], dtype='int32')
        label = static.data(name="label",
                            shape=[batch_size, 1],
                            dtype='float32')

        auto.shard_tensor(input,
                          dist_attr={
                              "process_mesh": PP_MESH_0,
                              "dims_mapping": [-1]
                          })
        auto.shard_tensor(label,
                          dist_attr={
                              "process_mesh": PP_MESH_1,
                              "dims_mapping": [-1, -1]
                          })

        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       initializer_range=0.02)

        predict = mlp(input)
        error_cost = paddle.nn.functional.square_error_cost(predict, label)
        loss = paddle.mean(error_cost)

    return loss, train_program, start_program
def mlp_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sqrt_hidden_size = 32
        double_hidden_size = 64

        input = static.data(name="input", shape=[8, 8, 16], dtype='int32')
        input = paddle.reshape(input, [hidden_size])
        input = paddle.reshape(input, [sqrt_hidden_size, sqrt_hidden_size])
        embedding = paddle.nn.Embedding(2, batch_size, sparse=True)
        input = embedding(input)
        input = paddle.reshape(input, [hidden_size, batch_size])
        input = paddle.transpose(input, perm=[1, 0])
        matmulinput = static.data(name="matmulinput",
                                  shape=[hidden_size, hidden_size],
                                  dtype='float32')
        input = layers.matmul(x=input, y=matmulinput)
        label = static.data(name="label",
                            shape=[batch_size, 1],
                            dtype='float32')
        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       initializer_range=0.02)

        predict = mlp(input)
        error_cost = paddle.nn.functional.square_error_cost(predict, label)
        loss = paddle.mean(error_cost)
        m = paddle.nn.Softmax()
        loss = m(loss)
    return loss, train_program, start_program
Example #3
0
def linear_static(func, device, dtype, np_x, np_weight, np_bias):
    paddle.enable_static()
    paddle.set_device(device)
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype)
            weight = static.data(name="weight",
                                 shape=np_weight.shape,
                                 dtype=dtype)
            bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype)
            x.stop_gradient = False
            weight.stop_gradient = False
            bias.stop_gradient = False
            out = func(x, weight, bias)
            mean_out = paddle.mean(out)
            static.append_backward(mean_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run(
                static.default_main_program(),
                feed={
                    "x": np_x.astype(dtype),
                    "weight": np_weight.astype(dtype),
                    "bias": np_bias.astype(dtype)
                },
                fetch_list=[
                    out.name, x.name + "@GRAD", weight.name + "@GRAD",
                    bias.name + "@GRAD"
                ])
    paddle.disable_static()
    return out_v, x_grad_v, weight_grad_v, bias_grad_v
Example #4
0
def mlp_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sequence_len = 512
        input = static.data(name="input",
                            shape=[batch_size, sequence_len, hidden_size],
                            dtype='float32')
        label = static.data(name="label",
                            shape=[batch_size, sequence_len, 1],
                            dtype='float32')

        auto.shard_tensor(input,
                          dist_attr={
                              "process_mesh": _global_process_mesh,
                              "dims_mappig": [-1, -1, -1]
                          })

        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       dropout_ratio=0.1,
                       initializer_range=0.02)

        predict = mlp(input)

        cost = layers.cross_entropy(input=predict, label=label)
        avg_cost = layers.mean(x=cost)

    return avg_cost, train_program, start_program
Example #5
0
def mlp_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        input = static.data(name="input",
                            shape=[batch_size, sequence_len, hidden_size],
                            dtype='float32')
        label = static.data(name="label",
                            shape=[batch_size, sequence_len, 1],
                            dtype='float32')

        auto.shard_tensor(input,
                          dist_attr={
                              "process_mesh": _global_process_mesh,
                              "dims_mappig": [-1, -1, -1]
                          })

        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       dropout_ratio=0.1,
                       initializer_range=0.02)

        predict = mlp(input)
        error_cost = paddle.nn.functional.square_error_cost(predict, label)
        loss = paddle.mean(error_cost)

        loader = paddle.io.DataLoader.from_generator(feed_list=[input, label],
                                                     capacity=4 * batch_size,
                                                     iterable=True)

    return loss, train_program, start_program, loader
def mlp_forward(train_program, start_program):
    with static.program_guard(train_program,start_program), \
        utils.unique_name.guard():
        batch_size = 4
        hidden_size = 64
        input = static.data(name="input",
                            shape=[batch_size, hidden_size],
                            dtype='float32')
        label = static.data(name="label",
                            shape=[batch_size, 1],
                            dtype='float32')

        if _global_parallel_strategy == "dp_mp_pp":
            auto.shard_tensor(input,
                              dist_attr={
                                  "process_mesh": _global_process_mesh[0],
                                  "dims_mapping": [0, -1]
                              })
        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       initializer_range=0.02)
        predict = mlp(input)
        error_cost = paddle.nn.functional.square_error_cost(predict, label)
        loss = paddle.mean(error_cost)
    return loss, train_program, start_program
Example #7
0
def rnn_pretrain_forward(train_program, start_program, topo=None):
    with static.program_guard(train_program,
                              start_program), paddle.utils.unique_name.guard():
        batch_size = 1
        tokens = static.data(
            name="tokens", shape=[batch_size, -1], dtype="int64")
        seq_len = static.data(name="ids", shape=[batch_size], dtype="int64")
        labels = static.data(name="labels", shape=[batch_size], dtype="int64")
        data_holders = [tokens, seq_len, labels]
        vocab_size = 10
        num_classes = 2
        pad_token_id = 0
        model = RNNModel(
            vocab_size,
            num_classes,
            direction='forward',
            padding_idx=pad_token_id,
            pooling_type='max')

        optimizer = paddle.optimizer.Adam(
            parameters=model.parameters(), learning_rate=0.001)
        criterion = paddle.nn.CrossEntropyLoss()
        preds = model(tokens, seq_len)
        loss = criterion(preds, labels)

    return train_program, start_program, loss, optimizer, data_holders
def gpt_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 16
        sequence_len = 512
        input_ids = static.data(
            name="input_ids", shape=[batch_size, sequence_len], dtype='int64')
        position_ids = static.data(
            name="position_ids",
            shape=[batch_size, sequence_len],
            dtype='int64')
        attention_mask = static.data(
            name="attention_mask",
            shape=[batch_size, 1, sequence_len, sequence_len],
            dtype='float64')
        labels = static.data(
            name="labels", shape=[batch_size, sequence_len], dtype='int64')
        loss_mask = static.data(
            name="loss_mask", shape=[batch_size, sequence_len], dtype='float64')

        if _global_parallel_strategy == "dp":
            auto.shard_tensor(
                input_ids,
                dist_attr={
                    "process_mesh": _global_process_mesh,
                    "dims_mapping": [0, -1]
                })
        elif _global_parallel_strategy == "dp_mp":
            auto.shard_tensor(
                input_ids,
                dist_attr={
                    "process_mesh": _global_process_mesh,
                    "dims_mapping": [0, -1]
                })

        gpt = GPTModel(
            vocab_size=32768,
            hidden_size=1024,
            num_hidden_layers=2,
            num_attention_heads=16,
            intermediate_size=4096,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=1024,
            type_vocab_size=16,
            initializer_range=0.02,
            pad_token_id=0,
            topo=None)

        model = GPTForPretraining(gpt)

        preds = model(input_ids, position_ids, attention_mask)

        criterion = GPTPretrainingCriterion()

        loss = criterion(preds, labels, loss_mask)

    return train_program, start_program
Example #9
0
def network():
    img = static.data(name='image', shape=[None, 784])
    hidden = static.nn.fc(input=img, size=200, act='relu')
    hidden = F.dropout(hidden, p=0.5)
    loss = F.cross_entropy(input=static.nn.fc(hidden, size=10, act='softmax'),
                           label=static.data(name='label',
                                             shape=[1],
                                             dtype='int64'))
    avg_loss = paddle.mean(loss)
    return avg_loss
Example #10
0
 def get_prog(self):
     main_program = Program()
     with program_guard(main_program):
         a = static.data(name="a", shape=[32, 32], dtype='float32')
         b = static.data(name="b", shape=[32, 32], dtype='float32')
         out = a / b
         fp16_a = a.cast(paddle.float16)
         fp16_b = b.cast(paddle.float16)
         out = fp16_a + fp16_b
     return main_program, out
Example #11
0
    def get_model(self, place, gradient_merge, batch_size, max_step):
        paddle.seed(2021)
        random.seed(2021)
        np.random.seed(2021)

        hidden_size = 128

        global _global_parallel_strategy
        global _global_process_mesh
        world_size = paddle.distributed.get_world_size()
        if world_size == 1:
            _global_parallel_strategy = "dp"
            _global_process_mesh = auto.ProcessMesh([0])
        elif world_size == 2:
            _global_parallel_strategy = "dp"
            _global_process_mesh = auto.ProcessMesh([0, 1])

        train_program = static.Program()
        startup_program = static.Program()
        dist_strategy = fleet.DistributedStrategy()
        dist_strategy.semi_auto = True
        #if gradient_merge:
        #    dist_strategy.gradient_merge = True
        #    dist_strategy.gradient_merge_configs = {"k_steps": 4, "avg": True}
        fleet.init(is_collective=True, strategy=dist_strategy)

        with static.program_guard(train_program, startup_program), \
            utils.unique_name.guard():
            input = static.data(name="input",
                                shape=[batch_size, hidden_size],
                                dtype='float32')
            label = static.data(name="label",
                                shape=[batch_size, 1],
                                dtype='float32')
            input.stop_gradient = False
            loss = mlp_forward(input, label, hidden_size)

        optimizer = paddle.fluid.optimizer.SGDOptimizer(learning_rate=0.01)
        #optimizer = paddle.fluid.optimizer.Adam(learning_rate=0.01)
        optimizer = fleet.distributed_optimizer(optimizer)
        _, self._params_grads, dist_startup_prog, dist_main_prog = optimizer.minimize(
            loss, startup_program)

        input_data = np.random.random(size=(128,
                                            hidden_size)).astype('float32')
        label_data = np.random.random(size=(128, 1)).astype('float32')

        def reader():
            for i in range(max_step):
                x_data = input_data[i * batch_size:(i + 1) * batch_size, :]
                y_data = label_data[i * batch_size:(i + 1) * batch_size, :]
                yield x_data, y_data

        return dist_main_prog, dist_startup_prog, [input,
                                                   label], [loss], reader
Example #12
0
def build_program(main_program,
                  startup_program,
                  image_shape,
                  dataset,
                  archs,
                  args,
                  places,
                  is_test=False):
    with static.program_guard(main_program, startup_program):
        with paddle.utils.unique_name.guard():
            data_shape = [None] + image_shape
            data = static.data(name='data', shape=data_shape, dtype='float32')
            label = static.data(name='label', shape=[None, 1], dtype='int64')
            if args.data == 'cifar10':
                paddle.assign(paddle.reshape(label, [-1, 1]), label)
            if is_test:
                data_loader = paddle.io.DataLoader(dataset,
                                                   places=places,
                                                   feed_list=[data, label],
                                                   drop_last=False,
                                                   batch_size=args.batch_size,
                                                   return_list=False,
                                                   shuffle=False)
            else:
                data_loader = paddle.io.DataLoader(dataset,
                                                   places=places,
                                                   feed_list=[data, label],
                                                   drop_last=True,
                                                   batch_size=args.batch_size,
                                                   return_list=False,
                                                   shuffle=True,
                                                   use_shared_memory=True,
                                                   num_workers=4)
            output = archs(data)
            output = static.nn.fc(output, size=args.class_dim)

            softmax_out = F.softmax(output)
            cost = F.cross_entropy(softmax_out, label=label)
            avg_cost = paddle.mean(cost)
            acc_top1 = paddle.metric.accuracy(input=softmax_out,
                                              label=label,
                                              k=1)
            acc_top5 = paddle.metric.accuracy(input=softmax_out,
                                              label=label,
                                              k=5)

            if is_test == False:
                optimizer = create_optimizer(args)
                optimizer.minimize(avg_cost)
    return data_loader, avg_cost, acc_top1, acc_top5
Example #13
0
    def test_static(self):
        mp, sp = static.Program(), static.Program()
        with static.program_guard(mp, sp):
            x = static.data("x", shape=[10, 10], dtype="float64")
            y = static.data("y", shape=[10, 10], dtype="float64")
            out = paddle.complex(x, y)

        exe = static.Executor()
        exe.run(sp)
        [out_np] = exe.run(mp,
                           feed={"x": self.x,
                                 "y": self.y},
                           fetch_list=[out])
        self.assertTrue(np.allclose(self.out, out_np))
Example #14
0
def mlp_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sequence_len = 512
        input = static.data(name="input",
                            shape=[batch_size, sequence_len, hidden_size],
                            dtype='float32')

        if _global_parallel_strategy == "dp":
            auto.shard_tensor(input,
                              dist_attr={
                                  "process_mesh": _global_process_mesh,
                                  "dims_mapping": [0, -1, -1]
                              })
        elif _global_parallel_strategy == "dp_mp":
            auto.shard_tensor(input,
                              dist_attr={
                                  "process_mesh": _global_process_mesh,
                                  "dims_mapping": [0, -1, -1]
                              })

        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       dropout_ratio=0.1,
                       initializer_range=0.02)
        out = mlp(input)
    return train_program, start_program
Example #15
0
    def check_static_result(self, place):
        from paddle.distributed.fleet.meta_parallel.parallel_layers.random import dropout
        with static.program_guard(static.Program(), static.Program()):
            input = static.data(name="input", shape=[40, 40], dtype="float32")
            res1 = dropout(
                input,
                p=0.3,
                training=True,
                mode='upscale_in_train',
                rng_name='seed0')
            res2 = dropout(
                input,
                p=0.3,
                training=True,
                mode='upscale_in_train',
                rng_name='seed1')
            res3 = dropout(input, p=0.3)

            in_np = np.random.random([40, 40]).astype("float32")

            exe = static.Executor(place)
            res_list = [res1, res2]
            for i in range(2):
                out1, out2 = exe.run(static.default_main_program(),
                                     feed={"input": in_np},
                                     fetch_list=res_list)
                self.assertTrue(np.allclose(out1, out2))
Example #16
0
def custom_relu_static(func,
                       device,
                       dtype,
                       np_x,
                       use_func=True,
                       test_infer=False):
    paddle.enable_static()
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = func(x) if use_func else paddle.nn.functional.relu(x)
            static.append_backward(out)

            exe = static.Executor()
            exe.run(static.default_startup_program())
            # in static mode, x data has been covered by out
            out_v = exe.run(static.default_main_program(),
                            feed={'X': np_x},
                            fetch_list=[out.name])

    paddle.disable_static()
    return out_v
Example #17
0
    def setUp(self):
        self._places = [paddle.CPUPlace()]
        if paddle.device.is_compiled_with_cuda():
            self._places.append(paddle.CUDAPlace(0))
        self._ema_decay = 0.999
        self._param_name = "fc.weight"
        self._train_program = static.Program()
        self._startup_prog = static.Program()

        strategy = paddle.distributed.fleet.DistributedStrategy()
        strategy.without_graph_optimization = True
        paddle.distributed.fleet.init(is_collective=True, strategy=strategy)

        with static.program_guard(self._train_program, self._startup_prog):
            with utils.unique_name.guard():
                data = static.data(name='x', shape=[-1, 5], dtype='float32')
                hidden = static.nn.fc(x=data,
                                      size=10,
                                      weight_attr=self._param_name)
                cost = paddle.mean(hidden)

                self._test_program = static.default_main_program().clone(
                    for_test=True)

                optimizer = paddle.optimizer.Adam(learning_rate=0.001)
                optimizer = paddle.distributed.fleet.distributed_optimizer(
                    optimizer, strategy)
                optimizer.minimize(cost)

                self._ema = static.ExponentialMovingAverage(self._ema_decay)
                self._ema.update()
Example #18
0
def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
    paddle.enable_static()
    paddle.set_device(device)

    places = static.cpu_places() if device is 'cpu' else static.cuda_places()
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = func(x) if use_func else paddle.nn.functional.relu(x)
            static.append_backward(out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            # in static mode, x data has been covered by out
            compiled_prog = static.CompiledProgram(
                static.default_main_program()).with_data_parallel(
                    loss_name=out.name, places=places)
            out_v = exe.run(compiled_prog,
                            feed={'X': np_x},
                            fetch_list=[out.name])

    paddle.disable_static()
    return out_v
Example #19
0
def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            # simple module
            data = static.data(name='data',
                               shape=[None, 1, 28, 28],
                               dtype='float32')
            label = static.data(name='label', shape=[None, 1], dtype='int64')

            hidden = static.nn.fc(data, size=128)
            hidden = func(hidden)
            hidden = static.nn.fc(hidden, size=128)
            predict = static.nn.fc(hidden, size=10, activation='softmax')
            loss = paddle.nn.functional.cross_entropy(input=hidden,
                                                      label=label)
            avg_loss = paddle.mean(loss)

            opt = paddle.optimizer.SGD(learning_rate=0.1)
            opt.minimize(avg_loss)

            # run start up model
            exe = static.Executor()
            exe.run(static.default_startup_program())

            # train
            for i in range(4):
                avg_loss_v = exe.run(static.default_main_program(),
                                     feed={
                                         'data': np_data,
                                         'label': np_label
                                     },
                                     fetch_list=[avg_loss])

            # save inference model
            static.save_inference_model(path_prefix, [data], [predict], exe)

            # get train predict value
            predict_v = exe.run(static.default_main_program(),
                                feed={
                                    'data': np_data,
                                    'label': np_label
                                },
                                fetch_list=[predict])

    return predict_v
Example #20
0
    def test_static_empty_input_error(self):
        paddle.enable_static()

        x_list_n_n, x_list_m_n = gen_empty_input()
        for p in (p_list_n_n + p_list_m_n):
            for x in x_list_n_n:
                with static.program_guard(static.Program(), static.Program()):
                    x_data = static.data("X", shape=x.shape, dtype=x.dtype)
                    self.assertRaises(ValueError, paddle.linalg.cond, x_data,
                                      p)

        for p in (p_list_n_n + p_list_m_n):
            for x in x_list_n_n:
                with static.program_guard(static.Program(), static.Program()):
                    x_data = static.data("X", shape=x.shape, dtype=x.dtype)
                    self.assertRaises(ValueError, paddle.linalg.cond, x_data,
                                      p)
 def build_program():
     program = static.Program()
     with static.program_guard(program):
         data = static.data(name='x', shape=[None, 13], dtype='float32')
         hidden = static.nn.fc(data, size=10)
         loss = paddle.mean(hidden)
         paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)
     return program
Example #22
0
def concat_static(func, dtype, np_inputs, axis_v, with_attr=False):
    paddle.enable_static()
    paddle.set_device("cpu")
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x1 = static.data(name="x1", shape=[2, 3], dtype=dtype)
            x2 = static.data(name="x2", shape=[2, 3], dtype=dtype)
            if with_attr:
                axis = axis_v
            else:
                axis = paddle.full(shape=[1], dtype='int64', fill_value=axis_v)
            x1.stop_gradient = False
            x2.stop_gradient = False

            total_time = 0
            for i in range(TEST_TIME):
                start = time.time()
                out = func([x1, x2], axis)
                total_time += time.time() - start
            print("- static mode concat time cost: {} s".format(total_time /
                                                                TEST_TIME))

            # mean only support float, so here use sum
            sum_out = paddle.sum(out)
            static.append_backward(sum_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            if with_attr:
                feed_dict = {
                    "x1": np_inputs[0].astype(dtype),
                    "x2": np_inputs[1].astype(dtype)
                }
            else:
                feed_dict = {
                    "x1": np_inputs[0].astype(dtype),
                    "x2": np_inputs[1].astype(dtype),
                    "axis": axis
                }
            out_v, x1_grad_v, x2_grad_v = exe.run(
                static.default_main_program(),
                feed=feed_dict,
                fetch_list=[out.name, x1.name + "@GRAD", x2.name + "@GRAD"])
    paddle.disable_static()
    return out_v, x1_grad_v, x2_grad_v
    def net(self):
        input_size = 4096
        output_size = 4096
        x = static.data(name='X', shape=[1000, 4096], dtype='float32')
        label = static.data(name='Y', shape=[1000, 4096], dtype='float32')
        model = SimpleNet(input_size, output_size)  # 定义模型
        mse = paddle.nn.MSELoss()

        out = model(x)
        loss = mse(out, label)

        opt = paddle.fluid.optimizer.Adam(
            learning_rate=0.0001, parameter_list=model.parameters())  # 定义优化器
        opt = paddle.static.amp.decorate(opt,
                                         init_loss_scaling=128.0,
                                         use_dynamic_loss_scaling=True)
        opt.minimize(loss)
        return model, loss, opt
Example #24
0
    def test_static_api_error(self):
        paddle.enable_static()
        # test raising errors when 'cond' is called in static mode
        p_list_error = ('f ro', 'fre', 'NUC', -1.6, 0, 5)
        x_list_n_n, x_list_m_n = gen_input()
        for p in p_list_error:
            for x in (x_list_n_n + x_list_m_n):
                with static.program_guard(static.Program(), static.Program()):
                    x_data = static.data("X", shape=x.shape, dtype=x.dtype)
                    self.assertRaises(ValueError, paddle.linalg.cond, x_data,
                                      p)

        for p in p_list_n_n:
            for x in x_list_m_n:
                with static.program_guard(static.Program(), static.Program()):
                    x_data = static.data("X", shape=x.shape, dtype=x.dtype)
                    self.assertRaises(ValueError, paddle.linalg.cond, x_data,
                                      p)
Example #25
0
    def test_static(self):
        mp, sp = static.Program(), static.Program()
        with static.program_guard(mp, sp):
            x = static.data("x", shape=[2, 3], dtype="complex128")
            out = paddle.angle(x)

        exe = static.Executor()
        exe.run(sp)
        [out_np] = exe.run(mp, feed={"x": self.x}, fetch_list=[out])
        self.assertTrue(np.allclose(self.out, out_np))
def create_data_loader(image_shape, is_train, args):
    image = static.data(name="image",
                        shape=[None] + image_shape,
                        dtype="float32")
    label = static.data(name="label", shape=[None, 1], dtype="int64")
    data_loader = paddle.io.DataLoader.from_generator(feed_list=[image, label],
                                                      capacity=64,
                                                      use_double_buffer=True,
                                                      iterable=True)
    drop_path_prob = ''
    drop_path_mask = ''
    if is_train:
        drop_path_prob = static.data(name="drop_path_prob",
                                     shape=[args.batch_size, 1],
                                     dtype="float32")
        drop_path_mask = static.data(name="drop_path_mask",
                                     shape=[args.batch_size, 20, 4, 2],
                                     dtype="float32")

    return data_loader, image, label, drop_path_prob, drop_path_mask
Example #27
0
def test_static_assert_true(self, x_list, p_list):
    for p in p_list:
        for x in x_list:
            with static.program_guard(static.Program(), static.Program()):
                input_data = static.data("X", shape=x.shape, dtype=x.dtype)
                output = paddle.linalg.cond(input_data, p)
                exe = static.Executor()
                result = exe.run(feed={"X": x}, fetch_list=[output])
                expected_output = np.linalg.cond(x, p)
                np.testing.assert_allclose(result[0],
                                           expected_output,
                                           rtol=5e-5)
    def test_dtype_error(self):
        # in static mode
        with self.assertRaises(TypeError):
            with static.program_guard(static.Program()):
                x = static.data(name="x", shape=self._shape, dtype="float32")
                out = paddle_apis[self.api](x, name="real_res")

        # in dynamic mode
        with self.assertRaises(RuntimeError):
            with fluid.dygraph.guard():
                input = np.random.random(self._shape).astype("float32")
                input_t = paddle.to_tensor(input)
                res = paddle_apis[self.api](input_t)
def mlp_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sequence_len = 512
        input = static.data(name="input",
                            shape=[batch_size, hidden_size],
                            dtype='float32')
        label = static.data(name="label",
                            shape=[batch_size, 1],
                            dtype='float32')
        loss_func = paddle.nn.CrossEntropyLoss(reduction="none")
        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       initializer_range=0.02)

        predict = mlp(input)
        error_cost = loss_func(predict, label)
        loss = paddle.mean(error_cost)

    return loss, train_program, start_program
Example #30
0
def decoder_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sequence_len = 512
        input_ids = static.data(name="input_ids",
                                shape=[batch_size, sequence_len],
                                dtype='int64')
        position_ids = static.data(name="position_ids",
                                   shape=[batch_size, sequence_len],
                                   dtype='int64')
        decoder = DecoderLayer(vocab_size=32768,
                               hidden_size=hidden_size,
                               sequence_len=sequence_len,
                               max_position_embeddings=512,
                               intermediate_size=4 * hidden_size,
                               num_heads=16,
                               dropout_ratio=0.1,
                               initializer_range=0.02)
        out = decoder(input_ids, position_ids)

    return train_program, start_program