def custom_relu_static(func, device, dtype, np_x, use_func=True, test_infer=False): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = func(x) if use_func else paddle.nn.functional.relu(x) static.append_backward(out) exe = static.Executor() exe.run(static.default_startup_program()) # in static mode, x data has been covered by out out_v = exe.run(static.default_main_program(), feed={'X': np_x}, fetch_list=[out.name]) paddle.disable_static() return out_v
def custom_relu_static_pe(func, device, dtype, np_x, use_func=True): paddle.enable_static() paddle.set_device(device) places = static.cpu_places() if device is 'cpu' else static.cuda_places() with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = func(x) if use_func else paddle.nn.functional.relu(x) static.append_backward(out) exe = static.Executor() exe.run(static.default_startup_program()) # in static mode, x data has been covered by out compiled_prog = static.CompiledProgram( static.default_main_program()).with_data_parallel( loss_name=out.name, places=places) out_v = exe.run(compiled_prog, feed={'X': np_x}, fetch_list=[out.name]) paddle.disable_static() return out_v
def linear_static(func, device, dtype, np_x, np_weight, np_bias): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype) weight = static.data(name="weight", shape=np_weight.shape, dtype=dtype) bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) x.stop_gradient = False weight.stop_gradient = False bias.stop_gradient = False out = func(x, weight, bias) mean_out = paddle.mean(out) static.append_backward(mean_out) exe = static.Executor() exe.run(static.default_startup_program()) out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run( static.default_main_program(), feed={ "x": np_x.astype(dtype), "weight": np_weight.astype(dtype), "bias": np_bias.astype(dtype) }, fetch_list=[ out.name, x.name + "@GRAD", weight.name + "@GRAD", bias.name + "@GRAD" ]) paddle.disable_static() return out_v, x_grad_v, weight_grad_v, bias_grad_v
def __init__(self, model=None, inputs_spec=None, labels_spec=None, cluster=None, strategy=None): self.model = model self.inputs_spec = self._validate_spec(inputs_spec) self.labels_spec = self._validate_spec(labels_spec) self.cluster = cluster # if self.cluster is None: # self.cluster = get_default_cluster() self.strategy = strategy if self.strategy is None: self.strategy = fleet.DistributedStrategy() self._executor = None self._cur_rank = paddle.distributed.get_rank() self._nranks = paddle.distributed.get_world_size() self._saver = DistributedSaver() self._logger = get_logger(logging.INFO) self._default_strategy = None self._orig_main_prog = static.default_main_program() self._orig_startup_prog = static.default_startup_program() self._orig_dist_context = get_default_distributed_context() self._dist_contexts = {} self._serial_main_progs = {} self._serial_startup_progs = {} self._dist_main_progs = defaultdict(dict) # dist main programs self._dist_startup_progs = defaultdict(dict) # dist startup programs self._feed_vars = {} self._fetch_vars = {}
def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): # simple module data = static.data(name='data', shape=[None, 1, 28, 28], dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') hidden = static.nn.fc(data, size=128) hidden = func(hidden) hidden = static.nn.fc(hidden, size=128) predict = static.nn.fc(hidden, size=10, activation='softmax') loss = paddle.nn.functional.cross_entropy(input=hidden, label=label) avg_loss = paddle.mean(loss) opt = paddle.optimizer.SGD(learning_rate=0.1) opt.minimize(avg_loss) # run start up model exe = static.Executor() exe.run(static.default_startup_program()) # train for i in range(4): avg_loss_v = exe.run(static.default_main_program(), feed={ 'data': np_data, 'label': np_label }, fetch_list=[avg_loss]) # save inference model static.save_inference_model(path_prefix, [data], [predict], exe) # get train predict value predict_v = exe.run(static.default_main_program(), feed={ 'data': np_data, 'label': np_label }, fetch_list=[predict]) return predict_v
def concat_static(func, dtype, np_inputs, axis_v, with_attr=False): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x1 = static.data(name="x1", shape=[2, 3], dtype=dtype) x2 = static.data(name="x2", shape=[2, 3], dtype=dtype) if with_attr: axis = axis_v else: axis = paddle.full(shape=[1], dtype='int64', fill_value=axis_v) x1.stop_gradient = False x2.stop_gradient = False total_time = 0 for i in range(TEST_TIME): start = time.time() out = func([x1, x2], axis) total_time += time.time() - start print("- static mode concat time cost: {} s".format(total_time / TEST_TIME)) # mean only support float, so here use sum sum_out = paddle.sum(out) static.append_backward(sum_out) exe = static.Executor() exe.run(static.default_startup_program()) if with_attr: feed_dict = { "x1": np_inputs[0].astype(dtype), "x2": np_inputs[1].astype(dtype) } else: feed_dict = { "x1": np_inputs[0].astype(dtype), "x2": np_inputs[1].astype(dtype), "axis": axis } out_v, x1_grad_v, x2_grad_v = exe.run( static.default_main_program(), feed=feed_dict, fetch_list=[out.name, x1.name + "@GRAD", x2.name + "@GRAD"]) paddle.disable_static() return out_v, x1_grad_v, x2_grad_v
def conj_static(func, shape, dtype, np_input): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=shape, dtype=dtype) x.stop_gradient = False out = func(x) sum_out = paddle.sum(out) static.append_backward(sum_out) exe = static.Executor() exe.run(static.default_startup_program()) out_v, x_grad_v = exe.run(static.default_main_program(), feed={"x": np_input}, fetch_list=[out.name, x.name + "@GRAD"]) paddle.disable_static() return out_v, x_grad_v
def test_relu2_static(device, dtype): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = librelu2_op.relu2(x) static.append_backward(out) print(static.default_main_program()) exe = static.Executor() exe.run(static.default_startup_program()) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) out, = exe.run(static.default_main_program(), feed={'X': x}, fetch_list=[out.name]) print(out)
def linear_static(func, dtype, np_x, np_weight, np_bias): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=np_x.shape, dtype=dtype) weight = static.data( name="weight", shape=np_weight.shape, dtype=dtype) bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) out = func(x, weight, bias) exe = static.Executor() exe.run(static.default_startup_program()) out_v, = exe.run(static.default_main_program(), feed={ "x": np_x.astype(dtype), "weight": np_weight.astype(dtype), "bias": np_bias.astype(dtype) }, fetch_list=[out.name]) paddle.disable_static() return out_v
image = static.data(name='image', shape=[None, 784], dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') # Define DataLoader loader = paddle.io.DataLoader.from_generator(feed_list=[image, label], capacity=16, iterable=ITERABLE) # Define network loss = simple_net(image, label) # Set data source of DataLoader # # If DataLoader is iterable, places must be given and the number of places must be the same with device number. # - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places. # - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places. # # If DataLoader is not iterable, places can be None. places = static.cuda_places() if USE_GPU else static.cpu_places() set_data_source(loader, places) exe = static.Executor(places[0]) exe.run(static.default_startup_program()) prog = static.CompiledProgram( static.default_main_program()).with_data_parallel(loss_name=loss.name) if loader.iterable: train_iterable(exe, prog, loss, loader) else: train_non_iterable(exe, prog, loss, loader)