def custom_relu_static(func, device, dtype, np_x, use_func=True, test_infer=False): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = func(x) if use_func else paddle.nn.functional.relu(x) static.append_backward(out) exe = static.Executor() exe.run(static.default_startup_program()) # in static mode, x data has been covered by out out_v = exe.run(static.default_main_program(), feed={'X': np_x}, fetch_list=[out.name]) paddle.disable_static() return out_v
def check_static_result(self, place): from paddle.distributed.fleet.meta_parallel.parallel_layers.random import dropout with static.program_guard(static.Program(), static.Program()): input = static.data(name="input", shape=[40, 40], dtype="float32") res1 = dropout( input, p=0.3, training=True, mode='upscale_in_train', rng_name='seed0') res2 = dropout( input, p=0.3, training=True, mode='upscale_in_train', rng_name='seed1') res3 = dropout(input, p=0.3) in_np = np.random.random([40, 40]).astype("float32") exe = static.Executor(place) res_list = [res1, res2] for i in range(2): out1, out2 = exe.run(static.default_main_program(), feed={"input": in_np}, fetch_list=res_list) self.assertTrue(np.allclose(out1, out2))
def custom_relu_static_pe(func, device, dtype, np_x, use_func=True): paddle.enable_static() paddle.set_device(device) places = static.cpu_places() if device is 'cpu' else static.cuda_places() with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = func(x) if use_func else paddle.nn.functional.relu(x) static.append_backward(out) exe = static.Executor() exe.run(static.default_startup_program()) # in static mode, x data has been covered by out compiled_prog = static.CompiledProgram( static.default_main_program()).with_data_parallel( loss_name=out.name, places=places) out_v = exe.run(compiled_prog, feed={'X': np_x}, fetch_list=[out.name]) paddle.disable_static() return out_v
def linear_static(func, device, dtype, np_x, np_weight, np_bias): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype) weight = static.data(name="weight", shape=np_weight.shape, dtype=dtype) bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) x.stop_gradient = False weight.stop_gradient = False bias.stop_gradient = False out = func(x, weight, bias) mean_out = paddle.mean(out) static.append_backward(mean_out) exe = static.Executor() exe.run(static.default_startup_program()) out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run( static.default_main_program(), feed={ "x": np_x.astype(dtype), "weight": np_weight.astype(dtype), "bias": np_bias.astype(dtype) }, fetch_list=[ out.name, x.name + "@GRAD", weight.name + "@GRAD", bias.name + "@GRAD" ]) paddle.disable_static() return out_v, x_grad_v, weight_grad_v, bias_grad_v
def run_prog(self, a, b): main_program, out = self.get_prog() place = paddle.set_device('npu') exe = static.Executor(place) out_ = exe.run(main_program, feed={"a": a, "b": b}, fetch_list=[out]) return out_
def test_static(self): mp, sp = static.Program(), static.Program() with static.program_guard(mp, sp): x = static.data("x", shape=[10, 10, 2], dtype="float64") out = paddle.as_complex(x) exe = static.Executor() exe.run(sp) [out_np] = exe.run(mp, feed={"x": self.x}, fetch_list=[out]) self.assertTrue(np.allclose(self.out, out_np))
def check_static_result(self, place): import paddle.distributed.fleet.meta_parallel.parallel_layers.random as random with static.program_guard(static.Program(), static.Program()): res1 = random.determinate_seed('seed0') exe = static.Executor(place) res_list = [res1] for i in range(2): out1, = exe.run(static.default_main_program(), fetch_list=res_list) self.assertEqual(out1, np.cast['int32'](self.rng1.random()))
def test_static_assert_true(self, x_list, p_list): for p in p_list: for x in x_list: with static.program_guard(static.Program(), static.Program()): input_data = static.data("X", shape=x.shape, dtype=x.dtype) output = paddle.linalg.cond(input_data, p) exe = static.Executor() result = exe.run(feed={"X": x}, fetch_list=[output]) expected_output = np.linalg.cond(x, p) np.testing.assert_allclose(result[0], expected_output, rtol=5e-5)
def run_inference(drop_last): loader = paddle.io.DataLoader.from_generator(feed_list=[x], capacity=8, drop_last=drop_last) loader.set_batch_generator(batch_generator, static.cpu_places()) exe = static.Executor(paddle.CPUPlace()) prog = static.CompiledProgram(static.default_main_program()) prog = prog.with_data_parallel() result = [] for data in loader(): each_ret, = exe.run(prog, feed=data, fetch_list=[y]) result.extend(each_ret) return result
def test_in_static_mode(self): def init_input_output(dtype): input = np.random.random(self._shape).astype( dtype) + 1j * np.random.random(self._shape).astype(dtype) return {'x': input}, numpy_apis[self.api](input) for dtype in self.dtypes: input_dict, np_res = init_input_output(dtype) for place in self.places: with static.program_guard(static.Program()): x = static.data(name="x", shape=self._shape, dtype=dtype) out = paddle_apis[self.api](x) exe = static.Executor(place) out_value = exe.run(feed=input_dict, fetch_list=[out.name]) self.assertTrue(np.array_equal(np_res, out_value[0]))
def test_in_static_mode(self): def init_input_output(dtype): input = np.random.random(self._shape).astype(dtype) return {'x': input}, psi(input) for dtype in self.dtypes: input_dict, sc_res = init_input_output(dtype) for place in self.places: with static.program_guard(static.Program()): x = static.data(name="x", shape=self._shape, dtype=dtype) out = paddle.digamma(x) exe = static.Executor(place) out_value = exe.run(feed=input_dict, fetch_list=[out.name]) self.assertEqual( np.allclose(out_value[0], sc_res, rtol=1e-5), True)
def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): # simple module data = static.data(name='data', shape=[None, 1, 28, 28], dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') hidden = static.nn.fc(data, size=128) hidden = func(hidden) hidden = static.nn.fc(hidden, size=128) predict = static.nn.fc(hidden, size=10, activation='softmax') loss = paddle.nn.functional.cross_entropy(input=hidden, label=label) avg_loss = paddle.mean(loss) opt = paddle.optimizer.SGD(learning_rate=0.1) opt.minimize(avg_loss) # run start up model exe = static.Executor() exe.run(static.default_startup_program()) # train for i in range(4): avg_loss_v = exe.run(static.default_main_program(), feed={ 'data': np_data, 'label': np_label }, fetch_list=[avg_loss]) # save inference model static.save_inference_model(path_prefix, [data], [predict], exe) # get train predict value predict_v = exe.run(static.default_main_program(), feed={ 'data': np_data, 'label': np_label }, fetch_list=[predict]) return predict_v
def concat_static(func, dtype, np_inputs, axis_v, with_attr=False): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x1 = static.data(name="x1", shape=[2, 3], dtype=dtype) x2 = static.data(name="x2", shape=[2, 3], dtype=dtype) if with_attr: axis = axis_v else: axis = paddle.full(shape=[1], dtype='int64', fill_value=axis_v) x1.stop_gradient = False x2.stop_gradient = False total_time = 0 for i in range(TEST_TIME): start = time.time() out = func([x1, x2], axis) total_time += time.time() - start print("- static mode concat time cost: {} s".format(total_time / TEST_TIME)) # mean only support float, so here use sum sum_out = paddle.sum(out) static.append_backward(sum_out) exe = static.Executor() exe.run(static.default_startup_program()) if with_attr: feed_dict = { "x1": np_inputs[0].astype(dtype), "x2": np_inputs[1].astype(dtype) } else: feed_dict = { "x1": np_inputs[0].astype(dtype), "x2": np_inputs[1].astype(dtype), "axis": axis } out_v, x1_grad_v, x2_grad_v = exe.run( static.default_main_program(), feed=feed_dict, fetch_list=[out.name, x1.name + "@GRAD", x2.name + "@GRAD"]) paddle.disable_static() return out_v, x1_grad_v, x2_grad_v
def test_conj_static_mode(self): def init_input_output(dtype): input = rand([2, 20, 2, 3]).astype(dtype) + 1j * rand( [2, 20, 2, 3]).astype(dtype) return {'x': input}, np.conj(input) for dtype in self._dtypes: input_dict, np_res = init_input_output(dtype) for place in self._places: with static.program_guard(static.Program()): x_dtype = np.complex64 if dtype == "float32" else np.complex128 x = static.data( name="x", shape=[2, 20, 2, 3], dtype=x_dtype) out = paddle.conj(x) exe = static.Executor(place) out_value = exe.run(feed=input_dict, fetch_list=[out.name]) self.assertTrue(np.array_equal(np_res, out_value[0]))
def conj_static(func, shape, dtype, np_input): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=shape, dtype=dtype) x.stop_gradient = False out = func(x) sum_out = paddle.sum(out) static.append_backward(sum_out) exe = static.Executor() exe.run(static.default_startup_program()) out_v, x_grad_v = exe.run(static.default_main_program(), feed={"x": np_input}, fetch_list=[out.name, x.name + "@GRAD"]) paddle.disable_static() return out_v, x_grad_v
def train(self, place, restore): exe = static.Executor(place) exe.run(self._startup_prog) params = [] for pass_id in range(2): for batch_id in range(3): exe.run(program=self._train_program, feed={'x': gen_data()}) tmp_param = np.array(static.global_scope().find_var( self._param_name).get_tensor()) params.append(tmp_param) with self._ema.apply(exe, restore): final_ema = np.array(static.global_scope().find_var( self._param_name).get_tensor()) exe.run(program=self._test_program, feed={'x': gen_data()}) if not restore: self._ema.restore(exe) return params, final_ema
def test_relu2_static(device, dtype): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = librelu2_op.relu2(x) static.append_backward(out) print(static.default_main_program()) exe = static.Executor() exe.run(static.default_startup_program()) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) out, = exe.run(static.default_main_program(), feed={'X': x}, fetch_list=[out.name]) print(out)
def test_static_save_and_load_inference_model(self): paddle.enable_static() np_data = np.random.random((1, 1, 28, 28)).astype("float32") np_label = np.random.random((1, 1)).astype("int64") path_prefix = "custom_op_inference/custom_relu" for device in self.devices: predict = custom_relu_static_inference( self.custom_ops[0], device, np_data, np_label, path_prefix) # load inference model with static.scope_guard(static.Scope()): exe = static.Executor() [inference_program, feed_target_names, fetch_targets] = static.load_inference_model(path_prefix, exe) predict_infer = exe.run(inference_program, feed={feed_target_names[0]: np_data}, fetch_list=fetch_targets) self.assertTrue( np.array_equal(predict, predict_infer), "custom op predict: {},\n custom op infer predict: {}". format(predict, predict_infer)) paddle.disable_static()
def test_relu2_static(device, dtype, use_custom=True): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = custom_relu_op_rf.relu2( x) if use_custom else paddle.nn.functional.relu(x) static.append_backward(out) print(static.default_main_program()) places = static.cuda_places() print(places) exe = static.Executor() compiled_prog = static.CompiledProgram( static.default_main_program()).with_data_parallel( loss_name=out.name, places=static.cuda_places()) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) out, = exe.run(compiled_prog, feed={'X': x}, fetch_list=[out.name]) print(out)
def linear_static(func, dtype, np_x, np_weight, np_bias): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=np_x.shape, dtype=dtype) weight = static.data( name="weight", shape=np_weight.shape, dtype=dtype) bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) out = func(x, weight, bias) exe = static.Executor() exe.run(static.default_startup_program()) out_v, = exe.run(static.default_main_program(), feed={ "x": np_x.astype(dtype), "weight": np_weight.astype(dtype), "bias": np_bias.astype(dtype) }, fetch_list=[out.name]) paddle.disable_static() return out_v
def search_mobilenetv2_block(config, args, image_size): image_shape = [3, image_size, image_size] transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) if args.data == 'cifar10': train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform, backend='cv2') val_dataset = paddle.vision.datasets.Cifar10(mode='test', transform=transform, backend='cv2') elif args.data == 'imagenet': train_dataset = imagenet_reader.ImageNetDataset(mode='train') val_dataset = imagenet_reader.ImageNetDataset(mode='val') places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] if args.is_server: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=False) for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() with static.program_guard(train_program, startup_program): data_shape = [None] + image_shape data = static.data(name='data', shape=data_shape, dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') if args.data == 'cifar10': paddle.assign(paddle.reshape(label, [-1, 1]), label) train_loader = paddle.io.DataLoader(train_dataset, places=places, feed_list=[data, label], drop_last=True, batch_size=args.batch_size, return_list=False, shuffle=True, use_shared_memory=True, num_workers=4) val_loader = paddle.io.DataLoader(val_dataset, places=place, feed_list=[data, label], drop_last=False, batch_size=args.batch_size, return_list=False, shuffle=False) data = conv_bn_layer(input=data, num_filters=32, filter_size=3, stride=2, padding='SAME', act='relu6', name='mobilenetv2_conv1') data = archs(data)[0] data = conv_bn_layer(input=data, num_filters=1280, filter_size=1, stride=1, padding='SAME', act='relu6', name='mobilenetv2_last_conv') data = F.adaptive_avg_pool2d(data, output_size=[1, 1], name='mobilenetv2_last_pool') output = static.nn.fc( x=data, size=args.class_dim, weight_attr=ParamAttr(name='mobilenetv2_fc_weights'), bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) softmax_out = F.softmax(output) cost = F.cross_entropy(softmax_out, label=label) avg_cost = paddle.mean(cost) acc_top1 = paddle.metric.accuracy(input=softmax_out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=softmax_out, label=label, k=5) test_program = train_program.clone(for_test=True) optimizer = paddle.optimizer.Momentum( learning_rate=0.1, momentum=0.9, weight_decay=paddle.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) if current_flops > int(321208544): continue exe = static.Executor(place) exe.run(startup_program) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(val_loader()): test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) sa_nas.reward(float(finally_reward[1]))
def search(config, args, image_size, is_server=True): places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] if is_server: ### start a server and a client sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: ### start a client sa_nas = SANAS(config, server_addr=(args.server_address, args.port), init_temperature=init_temperature, is_server=False) image_shape = [3, image_size, image_size] for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() train_fetch_list, _, train_loader = build_program(train_program, startup_program, image_shape, archs, args, is_train=True) current_params = count_parameters_in_MB( train_program.global_block().all_parameters(), 'cifar10') _logger.info('step: {}, current_params: {}M'.format( step, current_params)) if current_params > float(3.77): continue test_fetch_list, _, test_loader = build_program(test_program, startup_program, image_shape, archs, args, is_train=False) test_program = test_program.clone(for_test=True) exe = static.Executor(place) exe.run(startup_program) train_reader = reader.train_valid(batch_size=args.batch_size, is_train=True, is_shuffle=True) test_reader = reader.train_valid(batch_size=args.batch_size, is_train=False, is_shuffle=False) train_loader.set_batch_generator(train_reader, places=place) test_loader.set_batch_generator(test_reader, places=place) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel( loss_name=train_fetch_list[0].name, build_strategy=build_strategy) valid_top1_list = [] for epoch_id in range(args.retain_epoch): train_top1 = train(train_compiled_program, exe, epoch_id, train_loader, train_fetch_list, args) _logger.info("TRAIN: step: {}, Epoch {}, train_acc {:.6f}".format( step, epoch_id, train_top1)) valid_top1 = valid(test_program, exe, epoch_id, test_loader, test_fetch_list, args) _logger.info("TEST: Epoch {}, valid_acc {:.6f}".format( epoch_id, valid_top1)) valid_top1_list.append(valid_top1) sa_nas.reward(float(valid_top1_list[-1] + valid_top1_list[-2]) / 2)
def final_test(config, args, image_size, token=None): assert token != None, "If you want to start a final experiment, you must input a token." places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] sa_nas = SANAS(config, server_addr=(args.server_address, args.port), is_server=True) image_shape = [3, image_size, image_size] archs = sa_nas.tokens2arch(token)[0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() train_fetch_list, (data, label), train_loader = build_program(train_program, startup_program, image_shape, archs, args, is_train=True) current_params = count_parameters_in_MB( train_program.global_block().all_parameters(), 'cifar10') _logger.info('current_params: {}M'.format(current_params)) test_fetch_list, _, test_loader = build_program(test_program, startup_program, image_shape, archs, args, is_train=False) test_program = test_program.clone(for_test=True) exe = static.Executor(place) exe.run(startup_program) train_reader = reader.train_valid(batch_size=args.batch_size, is_train=True, is_shuffle=True) test_reader = reader.train_valid(batch_size=args.batch_size, is_train=False, is_shuffle=False) train_loader.set_batch_generator(train_reader, places=place) test_loader.set_batch_generator(test_reader, places=place) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=train_fetch_list[0].name, build_strategy=build_strategy) valid_top1_list = [] for epoch_id in range(args.retain_epoch): train_top1 = train(train_compiled_program, exe, epoch_id, train_loader, train_fetch_list, args) _logger.info("TRAIN: Epoch {}, train_acc {:.6f}".format( epoch_id, train_top1)) valid_top1 = valid(test_program, exe, epoch_id, test_loader, test_fetch_list, args) _logger.info("TEST: Epoch {}, valid_acc {:.6f}".format( epoch_id, valid_top1)) valid_top1_list.append(valid_top1) output_dir = os.path.join('darts_output', str(epoch_id)) if not os.path.exists(output_dir): os.makedirs(output_dir) static.save_inference_model(output_dir, [data], test_fetch_list, exe)
def test_search_result(tokens, image_size, args, config): places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) image_shape = [3, image_size, image_size] if args.data == 'cifar10': transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform, backend='cv2') val_dataset = paddle.vision.datasets.Cifar10(mode='test', transform=transform, backend='cv2') elif args.data == 'imagenet': train_dataset = imagenet_reader.ImageNetDataset(mode='train') val_dataset = imagenet_reader.ImageNetDataset(mode='val') archs = sa_nas.tokens2arch(tokens)[0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, train_dataset, archs, args, places) current_flops = flops(train_program) print('current_flops: {}'.format(current_flops)) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, val_dataset, archs, args, place, is_test=True) test_program = test_program.clone(for_test=True) exe = static.Executor(place) exe.run(startup_program) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'. format(epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'. format(batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2]))
image = static.data(name='image', shape=[None, 784], dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') # Define DataLoader loader = paddle.io.DataLoader.from_generator(feed_list=[image, label], capacity=16, iterable=ITERABLE) # Define network loss = simple_net(image, label) # Set data source of DataLoader # # If DataLoader is iterable, places must be given and the number of places must be the same with device number. # - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places. # - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places. # # If DataLoader is not iterable, places can be None. places = static.cuda_places() if USE_GPU else static.cpu_places() set_data_source(loader, places) exe = static.Executor(places[0]) exe.run(static.default_startup_program()) prog = static.CompiledProgram( static.default_main_program()).with_data_parallel(loss_name=loss.name) if loader.iterable: train_iterable(exe, prog, loss, loader) else: train_non_iterable(exe, prog, loss, loader)
def search_mobilenetv2(config, args, image_size, is_server=True): places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] if is_server: ### start a server and a client rl_nas = RLNAS(key='lstm', configs=config, is_sync=False, server_addr=(args.server_address, args.port), controller_batch_size=1, controller_decay_steps=1000, controller_decay_rate=0.8, lstm_num_layers=1, hidden_size=10, temperature=1.0) else: ### start a client rl_nas = RLNAS(key='lstm', configs=config, is_sync=False, server_addr=(args.server_address, args.port), lstm_num_layers=1, hidden_size=10, temperature=1.0, controller_batch_size=1, controller_decay_steps=1000, controller_decay_rate=0.8, is_server=False) image_shape = [3, image_size, image_size] if args.data == 'cifar10': transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform, backend='cv2') val_dataset = paddle.vision.datasets.Cifar10(mode='test', transform=transform, backend='cv2') elif args.data == 'imagenet': train_dataset = imagenet_reader.ImageNetDataset(mode='train') val_dataset = imagenet_reader.ImageNetDataset(mode='val') for step in range(args.search_steps): archs = rl_nas.next_archs(1)[0][0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, train_dataset, archs, args, places) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, val_dataset, archs, args, place, is_test=True) test_program = test_program.clone(for_test=True) exe = static.Executor(place) exe.run(startup_program) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) rl_nas.reward(np.float32(finally_reward[1]))
# coding=utf-8 import numpy import paddle import paddle.static as static import paddle.nn.functional as F # 开启静态图模式 paddle.enable_static() paddle.set_device('cpu') # 网络结构定义 x = static.data(name='X', shape=[None, 13], dtype='float32') y = static.data(name='Y', shape=[None, 1], dtype='float32') predict = static.nn.fc(x=x, size=1) loss = F.square_error_cost(input=predict, label=y) avg_loss = paddle.mean(loss) # 执行环境准备 exe = static.Executor(paddle.CPUPlace()) exe.run(static.default_startup_program()) # 执行网络 x = numpy.random.random(size=(7, 13)).astype('float32') y = numpy.random.random(size=(8, 1)).astype('float32') loss_data, = exe.run(static.default_main_program(), feed={ 'X': x, 'Y': y }, fetch_list=[avg_loss.name])
import paddle import paddle.static as static paddle.enable_static() x = static.data(name="x", shape=[10, 10], dtype='float32') y = static.nn.fc(x, 10) z = static.nn.fc(y, 10) place = paddle.CPUPlace() exe = static.Executor(place) exe.run(static.default_startup_program()) prog = static.default_main_program() static.save(prog, "./temp") static.load(prog, "./temp")
opt = paddle.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) # data loader transform = Compose([Normalize(mean=[127.5], std=[127.5], data_format='CHW')]) train_dataset = paddle.vision.datasets.MNIST(mode='train', transform=transform) train_loader = paddle.io.DataLoader(train_dataset, feed_list=[image, label], batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2) # prepare exe = static.Executor() exe.run(static.default_startup_program()) places = paddle.static.cuda_places() compiled_program = static.CompiledProgram( static.default_main_program()).with_data_parallel(loss_name=loss.name, places=places) # train for epoch_id in range(EPOCH_NUM): for batch_id, (image_data, label_data) in enumerate(train_loader()): loss_data = exe.run(compiled_program, feed={ 'image': image_data, 'label': label_data },
def search_mobilenetv2(config, args, image_size, is_server=True): places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] if is_server: ### start a server and a client rl_nas = RLNAS( key='ddpg', configs=config, is_sync=False, obs_dim=26, ### step + length_of_token server_addr=(args.server_address, args.port)) else: ### start a client rl_nas = RLNAS(key='ddpg', configs=config, is_sync=False, obs_dim=26, server_addr=(args.server_address, args.port), is_server=False) image_shape = [3, image_size, image_size] if args.data == 'cifar10': transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform, backend='cv2') val_dataset = paddle.vision.datasets.Cifar10(mode='test', transform=transform, backend='cv2') elif args.data == 'imagenet': train_dataset = imagenet_reader.ImageNetDataset(mode='train') val_dataset = imagenet_reader.ImageNetDataset(mode='val') for step in range(args.search_steps): if step == 0: action_prev = [1. for _ in rl_nas.range_tables] else: action_prev = rl_nas.tokens[0] obs = [step] obs.extend(action_prev) archs = rl_nas.next_archs(obs=obs)[0][0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, train_dataset, archs, args, places) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, val_dataset, archs, args, place, is_test=True) test_program = test_program.clone(for_test=True) exe = static.Executor(place) exe.run(startup_program) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) obs = np.expand_dims(obs, axis=0).astype('float32') actions = rl_nas.tokens obs_next = [step + 1] obs_next.extend(actions[0]) obs_next = np.expand_dims(obs_next, axis=0).astype('float32') if step == args.search_steps - 1: terminal = np.expand_dims([True], axis=0).astype(np.bool) else: terminal = np.expand_dims([False], axis=0).astype(np.bool) rl_nas.reward(np.expand_dims(np.float32(finally_reward[1]), axis=0), obs=obs, actions=actions.astype('float32'), obs_next=obs_next, terminal=terminal) if step == 2: sys.exit(0)