def build_network(self): main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): image = fluid.layers.data(name='image', shape=IMAGE_SHAPE, dtype='float32') label = fluid.layers.data(name='label', shape=LABEL_SHAPE, dtype='int64') simple_fc_net_with_inputs(image, label) return main_prog, startup_prog, [image, label]
def branch(i, img, label, mod_two): if mod_two: predicate = ((i % 2) == 0) else: predicate = ((i % 2) != 0) return layers.cond(predicate, lambda: simple_fc_net_with_inputs(img, label, class_num=10), lambda: batchnorm_fc_with_inputs(img, label, class_num=10))
def build_program(self, main_program, startup_program): with fluid.unique_name.guard(): with fluid.program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') img = fluid.data(name='image', shape=[-1, 784], dtype='float32') label = fluid.data(name='label', shape=[-1, 1], dtype='int64') loss = simple_fc_net_with_inputs(img, label, class_num=10) loss = simple_fc_net() opt = fluid.optimizer.SGD(learning_rate=0.001) opt.minimize(loss) array = layers.array_write(x=img, i=i) i = layers.increment(i) layers.array_write(x=label, i=i, array=array) i = layers.increment(i) layers.array_write(x=loss, i=i, array=array) return loss, array
def branch(i, img, label): return layers.cond( (i % 2) == 0, lambda: simple_fc_net_with_inputs(img, label, class_num=10), lambda: batchnorm_fc_with_inputs(img, label, class_num=10))
def cond_func(i, img, label): predicate = ((i % 2) == 0) return layers.cond( predicate, lambda: simple_fc_net_with_inputs(img, label, class_num=10), lambda: batchnorm_fc_with_inputs(img, label, class_num=10))
def cuda_graph_static_graph_main(self, seed, use_cuda_graph): batch_size = 1 class_num = 10 image_shape = [batch_size, 784] label_shape = [batch_size, 1] paddle.seed(seed) np.random.seed(seed) startup = paddle.static.Program() main = paddle.static.Program() with paddle.static.program_guard(main, startup): image = paddle.static.data(name="image", shape=image_shape, dtype='float32') label = paddle.static.data(name="label", shape=label_shape, dtype='int64') image.persistable = True label.persistable = True loss = simple_fc_net_with_inputs(image, label, class_num) loss.persistable = True lr = paddle.optimizer.lr.PiecewiseDecay( boundaries=[2, 3, 4], values=[0.01, 0.02, 0.03, 0.04]) optimizer = paddle.optimizer.SGD(learning_rate=lr) optimizer.minimize(loss) place = paddle.CUDAPlace(0) exe = paddle.static.Executor(place) scope = paddle.static.Scope() with paddle.static.scope_guard(scope): exe.run(startup) build_strategy = paddle.static.BuildStrategy() build_strategy.allow_cuda_graph_capture = True build_strategy.fix_op_run_order = True build_strategy.fuse_all_optimizer_ops = True compiled_program = paddle.static.CompiledProgram( main).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy, places=place) image_t = scope.var(image.name).get_tensor() label_t = scope.var(label.name).get_tensor() loss_t = scope.var(loss.name).get_tensor() lr_var = main.global_block().var(lr._var_name) self.assertTrue(lr_var.persistable) lr_t = scope.var(lr_var.name).get_tensor() cuda_graph = None for batch_id in range(20): image_t.set( np.random.rand(*image_shape).astype('float32'), place) label_t.set( np.random.randint(low=0, high=class_num, size=label_shape, dtype='int64'), place) if batch_id == 1 and use_cuda_graph: cuda_graph = CUDAGraph(place, mode="global") cuda_graph.capture_begin() exe.run(compiled_program) cuda_graph.capture_end() if cuda_graph: lr_t.set(np.array([lr()], dtype='float32'), place) cuda_graph.replay() else: exe.run(compiled_program) lr.step() if cuda_graph: cuda_graph.reset() return np.array(loss_t)