예제 #1
0
def linear_static(func, device, dtype, np_x, np_weight, np_bias):
    paddle.enable_static()
    paddle.set_device(device)
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype)
            weight = static.data(name="weight",
                                 shape=np_weight.shape,
                                 dtype=dtype)
            bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype)
            x.stop_gradient = False
            weight.stop_gradient = False
            bias.stop_gradient = False
            out = func(x, weight, bias)
            mean_out = paddle.mean(out)
            static.append_backward(mean_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run(
                static.default_main_program(),
                feed={
                    "x": np_x.astype(dtype),
                    "weight": np_weight.astype(dtype),
                    "bias": np_bias.astype(dtype)
                },
                fetch_list=[
                    out.name, x.name + "@GRAD", weight.name + "@GRAD",
                    bias.name + "@GRAD"
                ])
    paddle.disable_static()
    return out_v, x_grad_v, weight_grad_v, bias_grad_v
예제 #2
0
    def check_static_result(self, place):
        from paddle.distributed.fleet.meta_parallel.parallel_layers.random import dropout
        with static.program_guard(static.Program(), static.Program()):
            input = static.data(name="input", shape=[40, 40], dtype="float32")
            res1 = dropout(
                input,
                p=0.3,
                training=True,
                mode='upscale_in_train',
                rng_name='seed0')
            res2 = dropout(
                input,
                p=0.3,
                training=True,
                mode='upscale_in_train',
                rng_name='seed1')
            res3 = dropout(input, p=0.3)

            in_np = np.random.random([40, 40]).astype("float32")

            exe = static.Executor(place)
            res_list = [res1, res2]
            for i in range(2):
                out1, out2 = exe.run(static.default_main_program(),
                                     feed={"input": in_np},
                                     fetch_list=res_list)
                self.assertTrue(np.allclose(out1, out2))
예제 #3
0
    def __init__(self,
                 model=None,
                 inputs_spec=None,
                 labels_spec=None,
                 cluster=None,
                 strategy=None):
        self.model = model
        self.inputs_spec = self._validate_spec(inputs_spec)
        self.labels_spec = self._validate_spec(labels_spec)
        self.cluster = cluster
        # if self.cluster is None:
        #     self.cluster = get_default_cluster()
        self.strategy = strategy
        if self.strategy is None:
            self.strategy = fleet.DistributedStrategy()

        self._executor = None
        self._cur_rank = paddle.distributed.get_rank()
        self._nranks = paddle.distributed.get_world_size()
        self._saver = DistributedSaver()
        self._logger = get_logger(logging.INFO)

        self._default_strategy = None
        self._orig_main_prog = static.default_main_program()
        self._orig_startup_prog = static.default_startup_program()
        self._orig_dist_context = get_default_distributed_context()
        self._dist_contexts = {}
        self._serial_main_progs = {}
        self._serial_startup_progs = {}
        self._dist_main_progs = defaultdict(dict)  # dist main programs
        self._dist_startup_progs = defaultdict(dict)  # dist startup programs
        self._feed_vars = {}
        self._fetch_vars = {}
예제 #4
0
def custom_relu_static(func,
                       device,
                       dtype,
                       np_x,
                       use_func=True,
                       test_infer=False):
    paddle.enable_static()
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = func(x) if use_func else paddle.nn.functional.relu(x)
            static.append_backward(out)

            exe = static.Executor()
            exe.run(static.default_startup_program())
            # in static mode, x data has been covered by out
            out_v = exe.run(static.default_main_program(),
                            feed={'X': np_x},
                            fetch_list=[out.name])

    paddle.disable_static()
    return out_v
예제 #5
0
    def setUp(self):
        self._places = [paddle.CPUPlace()]
        if paddle.device.is_compiled_with_cuda():
            self._places.append(paddle.CUDAPlace(0))
        self._ema_decay = 0.999
        self._param_name = "fc.weight"
        self._train_program = static.Program()
        self._startup_prog = static.Program()

        strategy = paddle.distributed.fleet.DistributedStrategy()
        strategy.without_graph_optimization = True
        paddle.distributed.fleet.init(is_collective=True, strategy=strategy)

        with static.program_guard(self._train_program, self._startup_prog):
            with utils.unique_name.guard():
                data = static.data(name='x', shape=[-1, 5], dtype='float32')
                hidden = static.nn.fc(x=data,
                                      size=10,
                                      weight_attr=self._param_name)
                cost = paddle.mean(hidden)

                self._test_program = static.default_main_program().clone(
                    for_test=True)

                optimizer = paddle.optimizer.Adam(learning_rate=0.001)
                optimizer = paddle.distributed.fleet.distributed_optimizer(
                    optimizer, strategy)
                optimizer.minimize(cost)

                self._ema = static.ExponentialMovingAverage(self._ema_decay)
                self._ema.update()
예제 #6
0
def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
    paddle.enable_static()
    paddle.set_device(device)

    places = static.cpu_places() if device is 'cpu' else static.cuda_places()
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = func(x) if use_func else paddle.nn.functional.relu(x)
            static.append_backward(out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            # in static mode, x data has been covered by out
            compiled_prog = static.CompiledProgram(
                static.default_main_program()).with_data_parallel(
                    loss_name=out.name, places=places)
            out_v = exe.run(compiled_prog,
                            feed={'X': np_x},
                            fetch_list=[out.name])

    paddle.disable_static()
    return out_v
예제 #7
0
def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            # simple module
            data = static.data(name='data',
                               shape=[None, 1, 28, 28],
                               dtype='float32')
            label = static.data(name='label', shape=[None, 1], dtype='int64')

            hidden = static.nn.fc(data, size=128)
            hidden = func(hidden)
            hidden = static.nn.fc(hidden, size=128)
            predict = static.nn.fc(hidden, size=10, activation='softmax')
            loss = paddle.nn.functional.cross_entropy(input=hidden,
                                                      label=label)
            avg_loss = paddle.mean(loss)

            opt = paddle.optimizer.SGD(learning_rate=0.1)
            opt.minimize(avg_loss)

            # run start up model
            exe = static.Executor()
            exe.run(static.default_startup_program())

            # train
            for i in range(4):
                avg_loss_v = exe.run(static.default_main_program(),
                                     feed={
                                         'data': np_data,
                                         'label': np_label
                                     },
                                     fetch_list=[avg_loss])

            # save inference model
            static.save_inference_model(path_prefix, [data], [predict], exe)

            # get train predict value
            predict_v = exe.run(static.default_main_program(),
                                feed={
                                    'data': np_data,
                                    'label': np_label
                                },
                                fetch_list=[predict])

    return predict_v
예제 #8
0
def test_relu2_static(device, dtype):
    paddle.enable_static()
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = librelu2_op.relu2(x)
            static.append_backward(out)
            print(static.default_main_program())

            exe = static.Executor()
            exe.run(static.default_startup_program())

            x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
            out, = exe.run(static.default_main_program(),
                           feed={'X': x},
                           fetch_list=[out.name])
            print(out)
예제 #9
0
    def check_static_result(self, place):
        import paddle.distributed.fleet.meta_parallel.parallel_layers.random as random
        with static.program_guard(static.Program(), static.Program()):
            res1 = random.determinate_seed('seed0')

            exe = static.Executor(place)
            res_list = [res1]
            for i in range(2):
                out1, = exe.run(static.default_main_program(),
                                fetch_list=res_list)
                self.assertEqual(out1, np.cast['int32'](self.rng1.random()))
예제 #10
0
def test_relu2_static(device, dtype, use_custom=True):
    paddle.enable_static()
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = custom_relu_op_rf.relu2(
                x) if use_custom else paddle.nn.functional.relu(x)
            static.append_backward(out)
            print(static.default_main_program())

            places = static.cuda_places()
            print(places)
            exe = static.Executor()
            compiled_prog = static.CompiledProgram(
                static.default_main_program()).with_data_parallel(
                    loss_name=out.name, places=static.cuda_places())

            x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
            out, = exe.run(compiled_prog, feed={'X': x}, fetch_list=[out.name])
            print(out)
예제 #11
0
def run_inference(drop_last):
    loader = paddle.io.DataLoader.from_generator(feed_list=[x],
            capacity=8, drop_last=drop_last)
    loader.set_batch_generator(batch_generator, static.cpu_places())

    exe = static.Executor(paddle.CPUPlace())
    prog = static.CompiledProgram(static.default_main_program())
    prog = prog.with_data_parallel()

    result = []
    for data in loader():
        each_ret, = exe.run(prog, feed=data, fetch_list=[y])
        result.extend(each_ret)
    return result
예제 #12
0
def concat_static(func, dtype, np_inputs, axis_v, with_attr=False):
    paddle.enable_static()
    paddle.set_device("cpu")
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x1 = static.data(name="x1", shape=[2, 3], dtype=dtype)
            x2 = static.data(name="x2", shape=[2, 3], dtype=dtype)
            if with_attr:
                axis = axis_v
            else:
                axis = paddle.full(shape=[1], dtype='int64', fill_value=axis_v)
            x1.stop_gradient = False
            x2.stop_gradient = False

            total_time = 0
            for i in range(TEST_TIME):
                start = time.time()
                out = func([x1, x2], axis)
                total_time += time.time() - start
            print("- static mode concat time cost: {} s".format(total_time /
                                                                TEST_TIME))

            # mean only support float, so here use sum
            sum_out = paddle.sum(out)
            static.append_backward(sum_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            if with_attr:
                feed_dict = {
                    "x1": np_inputs[0].astype(dtype),
                    "x2": np_inputs[1].astype(dtype)
                }
            else:
                feed_dict = {
                    "x1": np_inputs[0].astype(dtype),
                    "x2": np_inputs[1].astype(dtype),
                    "axis": axis
                }
            out_v, x1_grad_v, x2_grad_v = exe.run(
                static.default_main_program(),
                feed=feed_dict,
                fetch_list=[out.name, x1.name + "@GRAD", x2.name + "@GRAD"])
    paddle.disable_static()
    return out_v, x1_grad_v, x2_grad_v
예제 #13
0
def conj_static(func, shape, dtype, np_input):
    paddle.enable_static()
    paddle.set_device("cpu")
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name="x", shape=shape, dtype=dtype)
            x.stop_gradient = False
            out = func(x)
            sum_out = paddle.sum(out)
            static.append_backward(sum_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            out_v, x_grad_v = exe.run(static.default_main_program(),
                                      feed={"x": np_input},
                                      fetch_list=[out.name, x.name + "@GRAD"])
    paddle.disable_static()
    return out_v, x_grad_v
예제 #14
0
def linear_static(func, dtype, np_x, np_weight, np_bias):
    paddle.enable_static()
    paddle.set_device("cpu")
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name="x", shape=np_x.shape, dtype=dtype)
            weight = static.data(
                name="weight", shape=np_weight.shape, dtype=dtype)
            bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype)
            out = func(x, weight, bias)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            out_v, = exe.run(static.default_main_program(),
                             feed={
                                 "x": np_x.astype(dtype),
                                 "weight": np_weight.astype(dtype),
                                 "bias": np_bias.astype(dtype)
                             },
                             fetch_list=[out.name])
    paddle.disable_static()
    return out_v
예제 #15
0
import paddle
import paddle.static as static

paddle.enable_static()

startup_prog = static.Program()
main_prog = static.Program()
with static.program_guard(startup_prog, main_prog):
    x = static.data(name='X', shape=[1000, 784], dtype='float32')

    y = static.data(name='Y', shape=[784, 100], dtype='float32')

    z = paddle.matmul(x=x, y=y)

    binary_str = static.default_main_program().desc.serialize_to_string()
    prog_restored = static.default_main_program().parse_from_string(binary_str)

    print(static.default_main_program())
    print(prog_restored)
예제 #16
0
import os
import paddle
import paddle.static as static

paddle.enable_static()

os.environ['CPU_NUM'] = str(2)
places = static.cpu_places()

data = static.data(name="x", shape=[None, 1], dtype="float32")
hidden = static.nn.fc(input=data, size=10)
loss = paddle.mean(hidden)
paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)

build_strategy = static.BuildStrategy()
build_strategy.enable_inplace = True
build_strategy.memory_optimize = True
build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce
program = static.CompiledProgram(static.default_main_program())
program = program.with_data_parallel(loss_name=loss.name,
                                     build_strategy=build_strategy,
                                     places=places)
예제 #17
0
image = static.data(name='image', shape=[None, 784], dtype='float32')
label = static.data(name='label', shape=[None, 1], dtype='int64')

# Define DataLoader
loader = paddle.io.DataLoader.from_generator(feed_list=[image, label],
                                             capacity=16,
                                             iterable=ITERABLE)

# Define network
loss = simple_net(image, label)

# Set data source of DataLoader
#
# If DataLoader is iterable, places must be given and the number of places must be the same with device number.
#  - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places.
#  - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places.
#
# If DataLoader is not iterable, places can be None.
places = static.cuda_places() if USE_GPU else static.cpu_places()
set_data_source(loader, places)

exe = static.Executor(places[0])
exe.run(static.default_startup_program())

prog = static.CompiledProgram(
    static.default_main_program()).with_data_parallel(loss_name=loss.name)

if loader.iterable:
    train_iterable(exe, prog, loss, loader)
else:
    train_non_iterable(exe, prog, loss, loader)
예제 #18
0
# failed by an exception.
if not use_cuda:
    os.environ['CPU_NUM'] = str(2)
    places = static.cpu_places()
else:
    places = static.cuda_places()

data = static.data(name='X', shape=[None, 1], dtype='float32')
hidden = static.nn.fc(input=data, size=10)
loss = paddle.mean(hidden)
paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)

exe.run(static.default_startup_program())

build_strategy = static.BuildStrategy()
build_strategy.gradient_scale_strategy = \
          static.BuildStrategy.GradientScaleStrategy.Customized
compiled_prog = static.CompiledProgram(
    static.default_main_program()).with_data_parallel(
        loss_name=loss.name, build_strategy=build_strategy, places=places)

dev_count = len(places)
x = numpy.random.random(size=(10, 1)).astype('float32')
loss_grad = numpy.ones((dev_count)).astype("float32") * 0.01
loss_grad_name = loss.name + "@GRAD"
loss_data = exe.run(compiled_prog,
                    feed={
                        "X": x,
                        loss_grad_name: loss_grad
                    },
                    fetch_list=[loss.name, loss_grad_name])
예제 #19
0
# NOTE: If you use CPU to run the program, you need
# to specify the CPU_NUM, otherwise, paddle will use
# all the number of the logic core as the CPU_NUM,
# in that case, the batch size of the input should be
# greater than CPU_NUM, if not, the process will be
# failed by an exception.
if not use_cuda:
    os.environ['CPU_NUM'] = str(2)

exe = static.Executor(place)

data = static.data(name='X', shape=[None, 1], dtype='float32')
hidden = static.nn.fc(input=data, size=10)
loss = paddle.mean(hidden)

test_program = static.default_main_program().clone(for_test=True)
paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)

exe.run(static.default_startup_program())
compiled_train_prog = static.CompiledProgram(
    static.default_main_program()).with_data_parallel(loss_name=loss.name,
                                                      places=parallel_places)
# NOTE: if not set share_vars_from=compiled_train_prog,
# the parameters used in test process are different with
# the parameters used by train process
compiled_test_prog = static.CompiledProgram(test_program).with_data_parallel(
    share_vars_from=compiled_train_prog, places=parallel_places)

train_data = numpy.random.random(size=(10, 1)).astype('float32')
loss_data, = exe.run(compiled_train_prog,
                     feed={"X": train_data},
예제 #20
0
파일: ops.py 프로젝트: PaddlePaddle/Knover
def sampling_id(probs):
    prog = static.default_main_program()
    sampling_ids = prog.current_block().create_var(name="sampling_ids", dtype="int64", shape=[-1])
    static.py_func(func=_sampling_id, x=probs, out=sampling_ids)
    return sampling_ids
예제 #21
0
import paddle
import paddle.static as static

paddle.enable_static()

img = static.data(name='image', shape=[None, 784])
pred = static.nn.fc(input=img, size=10, act='relu')
loss = paddle.mean(pred)
# Here we use clone before Momentum
test_program = static.default_main_program().clone(for_test=True)
optimizer = paddle.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
optimizer.minimize(loss)
예제 #22
0
# coding=utf-8
import numpy
import paddle
import paddle.static as static
import paddle.nn.functional as F

# 开启静态图模式
paddle.enable_static()
paddle.set_device('cpu')

# 网络结构定义
x = static.data(name='X', shape=[None, 13], dtype='float32')
y = static.data(name='Y', shape=[None, 1], dtype='float32')
predict = static.nn.fc(x=x, size=1)
loss = F.square_error_cost(input=predict, label=y)
avg_loss = paddle.mean(loss)

# 执行环境准备
exe = static.Executor(paddle.CPUPlace())
exe.run(static.default_startup_program())

# 执行网络
x = numpy.random.random(size=(7, 13)).astype('float32')
y = numpy.random.random(size=(8, 1)).astype('float32')
loss_data, = exe.run(static.default_main_program(),
                     feed={
                         'X': x,
                         'Y': y
                     },
                     fetch_list=[avg_loss.name])
예제 #23
0
import paddle
import paddle.static as static

paddle.enable_static()

prog = static.default_main_program()
num_blocks = prog.num_blocks
print(num_blocks)
예제 #24
0
import paddle
import paddle.static as static

paddle.enable_static()

x1 = static.data(name='x1', shape=[2, 3], dtype='float32')
x2 = static.data(name='x2', shape=[2, 3], dtype='float32')
x3 = static.data(name='x3', shape=[2, 3], dtype='float32')

out1 = paddle.concat(x=[x1, x2, x3], axis=-1)

print(static.default_main_program())