def test_error(self): startup_program = Program() main_program = Program() use_cuda = core.is_compiled_with_cuda() with program_guard(main_program, startup_program): def fn_1(opt, avg_loss): opt.minimize(avg_loss) def fn_2(opt, avg_loss): opt.minimize(avg_loss) x = fluid.layers.data("X", [10], 'float32') hidden = layers.fc(x, 5) avg_loss = layers.mean(hidden) adam = optimizer.Adam(learning_rate=LR) sgd = optimizer.SGD(learning_rate=LR) cond = layers.fill_constant([1], 'bool', True) layers.case([(cond, lambda: fn_1(adam, avg_loss))], lambda: fn_2(sgd, avg_loss)) cpu_place = fluid.CPUPlace() cuda_place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() for place in [cpu_place, cuda_place]: exe = fluid.Executor(place) exe.run(startup_program) np.random.seed(SEED) # NOTE(liym27): # This test needs to run in multi cards to test NotImplementedError. # Here, move this test from RUN_TYPE=DIST in tests/unittests/CMakeList.txt, # to use multi cards ** only on CPU ** not GPU to reduce CI time. os.environ['CPU_NUM'] = str(2) pe_exe = fluid.ParallelExecutor(use_cuda=use_cuda, main_program=main_program, loss_name=avg_loss.name) num_devices = pe_exe.device_count def not_implemented_error(): pe_exe.run(feed={ 'X': np.random.random(size=[64, 10]).astype('float32'), }, fetch_list=[avg_loss.name]) if num_devices > 1: self.assertRaises(NotImplementedError, not_implemented_error)
def test_load(self): mul_out, b1_out, b2_out, mean_out = self.net() sgd_optimizer = optimizer.SGD(learning_rate=1.0) recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer._set_checkpoints([b1_out]) try: stat_dict = {} recompute_optimizer.load(stat_dict) except NotImplementedError as e: self.assertEqual( "load function is not supported by Recompute Optimizer for now", cpt.get_exception_message(e))
def setUp(self): program = Program() with program_guard(program, startup_program=Program()): x = layers.data(name='x', shape=[13], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) y = layers.data(name='y', shape=[1], dtype='float32') cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) opt = optimizer.SGD(learning_rate=0.001) opt = opt.minimize(avg_cost) self.program = program
def setUp(self): program = Program() with program_guard(program, startup_program=Program()): x = layers.data(name='x', shape=[13], dtype='float32') fc = layers.fc(input=x, size=10, act=None) reshape = layers.reshape(x=fc, shape=[-1, 2, 5]) fc = layers.reshape(x=reshape, shape=[-1, 5, 2]) y_predict = layers.fc(input=fc, size=1, act=None) y = layers.data(name='y', shape=[1], dtype='float32') cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) opt = optimizer.SGD(learning_rate=0.001) opt.minimize(avg_cost) self.skip_set = set([cost.name, fc.name]) self.program = program
def test_adjacent_checkpoint(self): mul_out, b1_out, b2_out, mean_out = self.net() self.assertEqual(len(mean_out.block.ops), 4) self.assertEqual([op.type for op in mean_out.block.ops], ["mul", "elementwise_add", "elementwise_add", "mean"]) sgd_optimizer = optimizer.SGD(learning_rate=1.0) recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer._set_checkpoints([mul_out, b1_out]) opts, params_grads = recompute_optimizer.minimize(mean_out) self.assertEqual(len(mean_out.block.ops), 12) self.assertEqual([op.type for op in mean_out.block.ops], [ "mul", "elementwise_add", "elementwise_add", "mean", "fill_constant", "mean_grad", "elementwise_add_grad", "elementwise_add_grad", "mul_grad", "sgd", "sgd", "sgd" ])
def test_lookahead_optimizer(self): init_program = framework.Program() program = framework.Program() block = program.global_block() init_block = init_program.global_block() mul_x = block.create_parameter(dtype="float32", shape=[5, 10], lod_level=0, name="mul.x", optimize_attr={'learning_rate': 1.1}) init_mul_x = init_block.create_parameter(dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") mul_y = block.create_var(dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var(dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") mean_out = block.create_var(dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op(type="mul", inputs={ "X": mul_x, "Y": mul_y }, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) block.append_op(type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) sgd = optimizer.SGD(learning_rate=0.01) lookahead = optimizer.LookaheadOptimizer(sgd, alpha=0.5, k=5) with framework.program_guard(program, init_program): opts, _ = lookahead.minimize(mean_out) self.assertEqual(len(opts), 2) self.assertEqual([op.type for op in opts], ["scale", "sgd"])
def test_dropout(self): """ If there are dropout layers in the forward nets, we should add a seed op """ mul_out, b1_out, b2_out, mean_out = self.net(with_dropout=True) self.assertEqual(len(mean_out.block.ops), 5) self.assertEqual( [op.type for op in mean_out.block.ops], ["mul", "dropout", "elementwise_add", "elementwise_add", "mean"]) sgd_optimizer = optimizer.SGD(learning_rate=1.0) recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer._set_checkpoints([b1_out]) opts, params_grads = recompute_optimizer.minimize(mean_out) self.assertEqual(len(mean_out.block.ops), 17) self.assertEqual([op.type for op in mean_out.block.ops], [ "mul", "seed", "dropout", "elementwise_add", "elementwise_add", "mean", "fill_constant", "mean_grad", "elementwise_add_grad", "mul", "dropout", "elementwise_add_grad", "dropout_grad", "mul_grad", "sgd", "sgd", "sgd" ])
def test_apply_gradients(self): mul_out, b1_out, b2_out, mean_out = self.net() sgd_optimizer = optimizer.SGD(learning_rate=1.0) recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer._set_checkpoints([b1_out]) # apply backward params_grads = recompute_optimizer.backward(mean_out, startup_program=None, parameter_list=None, no_grad_set=None) # apply gradient program = mean_out.block.program with framework.program_guard(program, None): optimize_ops = recompute_optimizer.apply_gradients(params_grads) self.assertEqual(len(mean_out.block.ops), 13) self.assertEqual([op.type for op in mean_out.block.ops], [ "mul", "elementwise_add", "elementwise_add", "mean", "fill_constant", "mean_grad", "elementwise_add_grad", "mul", "elementwise_add_grad", "mul_grad", "sgd", "sgd", "sgd" ])
def static(train_data, loss_in_switch=True, use_cuda=False, use_parallel_exe=False): startup_program = Program() main_program = Program() startup_program.random_seed = SEED main_program.random_seed = SEED with program_guard(main_program, startup_program): def double_fc_net(image): hidden = layers.fc( image, size=FC_SIZE, act='relu', param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.99)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.5)), name="hidden") prediction = layers.fc( hidden, size=CLASS_NUM, act='softmax', param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=1.2)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.8)), name="prediction") return hidden, prediction def fn_1(opt, avg_loss=None, pred=None, label=None): if avg_loss is None: loss = layers.cross_entropy(input=pred, label=label) avg_loss = layers.mean(loss, name='mean_cross_entropy_loss') opt.minimize(avg_loss) return avg_loss def fn_2(opt, avg_loss=None, pred=None, label=None): if avg_loss is None: loss = layers.softmax_with_cross_entropy(logits=pred, label=label) avg_loss = layers.mean(loss, name='mean_softmax_loss') opt.minimize(avg_loss) return avg_loss image = fluid.data('image', [BATCH_SIZE, INPUT_SIZE], 'float32') label = fluid.data('label', [BATCH_SIZE, 1], 'int64') hidden, prediction = double_fc_net(image) adam = optimizer.Adam(learning_rate=LR) sgd = optimizer.SGD(learning_rate=LR) id = fluid.data('id', [1], 'int32') two = layers.fill_constant([1], 'int32', 2) mod_two = layers.elementwise_mod(id, two) == 0 if loss_in_switch: avg_loss = layers.case( [(mod_two, lambda: fn_1(adam, None, prediction, label))], lambda: fn_2(sgd, None, prediction, label)) else: loss_1 = layers.cross_entropy(input=prediction, label=label) avg_loss_1 = layers.mean(loss_1) loss_2 = layers.softmax_with_cross_entropy(logits=prediction, label=label) avg_loss_2 = layers.mean(loss_2) avg_loss = layers.case([(mod_two, lambda: fn_1(adam, avg_loss_1))], lambda: fn_2(sgd, avg_loss_2)) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) for epoch in range(EPOCH_NUM): feed_image, feed_label = train_data[epoch] fetch_list = [hidden, prediction, avg_loss] feed = { 'image': feed_image, 'label': feed_label, 'id': np.array([epoch]).astype('int32') } out = exe.run(main_program, feed=feed, fetch_list=fetch_list) out_hidden, out_pred, loss = out return out_hidden, out_pred, loss
inp = layers.data(name='inp', shape=params["federated"]["input_shape"], dtype=params["federated"]["input_dtype"]) label = layers.data(name='label', shape=params["federated"]["label_shape"], dtype=params["federated"]["label_dtype"]) if "number_of_class" in params["centralized"]: model = select_model(params["federated"]["model_name"], inp, label, params["federated"]["number_of_class"]) else: model = select_model(params["federated"]["model_name"], inp, label) # Clients configs ########################### job_generator = JobGenerator() optimizer = optimizer.SGD(learning_rate=params["federated"]["learning_rate"]) job_generator.set_optimizer(optimizer) job_generator.set_losses([model.loss]) job_generator.set_startup_program(model.startup_program) job_generator.set_infer_feed_and_target_names( [model.inputs.name, model.label.name], model.fetch_list) # Choose the Federated learning strategy ############################################ # FLStrategyFactory allow to choose between three strategies fed_avg, dpsgd, sec_agg # I choose the fed_avg strategy build_strategy = FLStrategyFactory() build_strategy.fed_avg = True build_strategy.inner_step = 1