def test_return_single_var(self): def fn_1(): return layers.fill_constant(shape=[4, 2], dtype='int32', value=1) def fn_2(): return layers.fill_constant(shape=[4, 2], dtype='int32', value=2) def fn_3(): return layers.fill_constant(shape=[4, 3], dtype='int32', value=3) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): index_1 = layers.fill_constant(shape=[1], dtype='int32', value=1) index_2 = layers.fill_constant(shape=[1], dtype='int32', value=2) index_5 = layers.fill_constant(shape=[1], dtype='int32', value=5) # call fn_1 out_0 = layers.switch_case(branch_index=index_1, branch_fns={ 1: fn_1, 2: fn_2, 3: fn_3 }) # call fn_2 : branch_fns={0: fn_1, 1:fn_2, 2:fn_3} out_1 = layers.switch_case(branch_index=index_1, branch_fns=(fn_1, fn_2, fn_3)) # call default fn_3 out_2 = layers.switch_case(branch_index=index_5, branch_fns=((1, fn_1), (2, fn_2)), default=fn_3) # no default, call fn_2 out_3 = layers.switch_case(branch_index=index_2, branch_fns=[(1, fn_1), (2, fn_2)]) # no default, call fn_2 but branch_index is 5 out_4 = layers.switch_case(branch_index=index_5, branch_fns=[(1, fn_1), (3, fn_2), (2, fn_3)]) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=[out_0, out_1, out_2, out_3, out_4]) self.assertTrue(np.allclose(res[0], 1), "result is {} but answer is {}".format(res[0], 1)) self.assertTrue(np.allclose(res[1], 2), "result is {} but answer is {}".format(res[0], 2)) self.assertTrue(np.allclose(res[2], 3), "result is {} but answer is {}".format(res[0], 3)) self.assertTrue(np.allclose(res[3], 2), "result is {} but answer is {}".format(res[0], 2)) self.assertTrue(np.allclose(res[4], 2), "result is {} but answer is {}".format(res[0], 2))
def build_predict_forward(self): head_dict = {} backbone = self._trainers[0]._pred_backbone for i in self._trainers: assert i._pred_head is not None and i._pred_backbone is not None, "You should build_predict_forward for the {} task".format( i._name) assert i._pred_backbone == backbone, "The backbone for each task must be the same" head_dict[i._name] = i._pred_head pred_prog = fluid.Program() pred_init_prog = fluid.Program() self._pred_prog = pred_prog self._pred_init_prog = pred_init_prog def get_loss(i): head = head_dict[self._trainers[i].name] self._trainers[i]._lock_prog = True pred_vars = self._trainers[i].build_predict_forward(backbone, head) self._trainers[i]._lock_prog = False # return loss_var task_fns = { i: lambda i=i: get_loss(i) for i in range(len(self._trainers)) } with fluid.program_guard(pred_prog, pred_init_prog): task_id_var = fluid.data(name="__task_id", shape=[1], dtype='int64') loss_var = layers.switch_case(branch_index=task_id_var, branch_fns=task_fns) if not self._multi_task: self._init_exe_prog(for_train=False)
def build_forward(self): """ Build forward computation graph for training, which usually built from input layer to loss node. Return: - loss_var: a Variable object. The computational graph variable(node) of loss. """ head_dict = {} backbone = self._trainers[0]._backbone for i in self._trainers: assert i._task_head is not None and i._backbone is not None, "You should build forward for the {} task".format( i._name) assert i._backbone == backbone, "The backbone for each task must be the same" head_dict[i._name] = i._task_head train_prog = fluid.Program() train_init_prog = fluid.Program() self._train_prog = train_prog self._train_init_prog = train_init_prog def get_loss(i): head = head_dict[self._trainers[i].name] self._trainers[i]._lock_prog = True loss_var = self._trainers[i].build_forward(backbone, head) self._trainers[i]._lock_prog = False return loss_var task_fns = { i: lambda i=i: get_loss(i) for i in range(len(self._trainers)) } with fluid.program_guard(train_prog, train_init_prog): task_id_var = fluid.data(name="__task_id", shape=[1], dtype='int64') loss_var = layers.switch_case(branch_index=task_id_var, branch_fns=task_fns) self._task_id_var = task_id_var self._loss_var = loss_var self._fetch_list = [loss_var.name] # for b in train_prog.blocks: # for var in b.vars: # pass # if 'task_id' in var: # print(var) # exit() # print(var) if not self._multi_task: self._init_exe_prog(for_train=True) return loss_var
def fn_3(): out = layers.switch_case(branch_index=layers.fill_constant( shape=[1], dtype='int32', value=3), branch_fns={ 1: partial(layers.fill_constant, shape=[4, 3], dtype='int32', value=1), 3: partial(fn_2, x=3) }) return out
def fn_1(x=1): out = layers.switch_case(branch_index=layers.fill_constant( shape=[1], dtype='int32', value=x), branch_fns={ 1: partial(layers.fill_constant, shape=[1], dtype='int32', value=1), x: partial(layers.fill_constant, shape=[2], dtype='int32', value=x) }) return out
def body(i): def fn_add_three(): data_add_three = layers.elementwise_add(x=i, y=three) return data_add_three def fn_square(): data_mul_data = layers.elementwise_mul(x=i, y=i) return data_mul_data def fn_add_one(): data_add_one = layers.elementwise_add(x=i, y=one) return data_add_one return layers.switch_case( branch_index=i, branch_fns={2: fn_add_three, 5: fn_square}, default=fn_add_one)
def test_return_var_tuple(self): def fn_1(): return layers.fill_constant(shape=[1, 2], dtype='int32', value=1), layers.fill_constant( shape=[2, 3], dtype='float32', value=2) def fn_2(): return layers.fill_constant(shape=[3, 4], dtype='int32', value=3), layers.fill_constant( shape=[4, 5], dtype='float32', value=4) def fn_3(): return layers.fill_constant(shape=[5], dtype='int32', value=5), layers.fill_constant( shape=[5, 6], dtype='float32', value=6) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): index_1 = layers.fill_constant(shape=[1], dtype='int32', value=1) out = layers.switch_case(index_1, ((1, fn_1), (2, fn_2)), fn_3) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=out) self.assertTrue( np.allclose(np.asarray(ret[0]), np.full((1, 2), 1, np.int32))) self.assertTrue( np.allclose(np.asarray(ret[1]), np.full((2, 3), 2, np.float32)))
def _init_train(self): instances = self.instances Backbone = self.Backbone bb_conf = self.bb_conf bb_name = self.bb_name dev_count = self.dev_count num_instances = len(instances) mrs = self.mrs branch = fluid.data(name="branch", shape=[1], dtype='int64') # set first_target/main task instance main_inst = None for inst in instances: if inst.is_target: main_inst = inst inst.is_first_target = True break main_conf = main_inst.config if not os.path.exists(main_conf['save_path']): os.makedirs(main_conf['save_path']) os.makedirs(os.path.join(main_conf['save_path'], 'ckpt')) # prepare backbone train_backbone = Backbone(bb_conf, phase='train') pred_backbone = Backbone(bb_conf, phase='pred') # create reader, task # then check i/o across reader, backbone and task_layer # check_fns = {} task_attrs = {} pred_task_attrs = [] joint_input_names = {} joint_shape_and_dtypes = {} name_to_position = {} for i in range(num_instances): # def check_tasks(): # i = s # def checkeach(): train_reader = instances[i].Reader(instances[i].config, phase='train') instances[i].reader['train'] = train_reader train_parad = instances[i].Paradigm(instances[i].config, phase='train', backbone_config=bb_conf) instances[i].task_layer['train'] = train_parad task_attr_from_reader = _encode_inputs( train_parad.inputs_attrs['reader'], instances[i].name) task_attrs[i] = task_attr_from_reader _check_io(train_backbone.inputs_attr, train_reader.outputs_attr, in_name=bb_name + '_backbone', out_name='reader.train') _check_io(train_parad.inputs_attrs['reader'], train_reader.outputs_attr, in_name='task_paradigm.train.reader', out_name='reader.train') _check_io(train_parad.inputs_attrs['backbone'], train_backbone.outputs_attr, in_name='task_paradigm.train.backbone', out_name=bb_name + '_backbone') # merge reader input attrs from backbone and task_instances # pred_joint_input_names = [] # pred_joint_shape_and_dtypes = [] if instances[i].is_target: if 'pred_file' not in instances[i].config: instances[i].config['pred_file'] = '' pred_reader = instances[i].Reader(instances[i].config, phase='pred') pred_parad = instances[i].Paradigm(instances[i].config, phase='pred', backbone_config=bb_conf) instances[i].task_layer['pred'] = pred_parad task_attr_from_reader = _encode_inputs( pred_parad.inputs_attrs['reader'], instances[i].name) pred_task_attrs.append(task_attr_from_reader) _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name + '_backbone', out_name='reader.pred') _check_io(pred_parad.inputs_attrs['reader'], pred_reader.outputs_attr, in_name='task_paradigm.pred.reader', out_name='reader.pred') _check_io(pred_parad.inputs_attrs['backbone'], pred_backbone.outputs_attr, in_name='task_paradigm.pred.backbone', out_name=bb_name + '_backbone') # pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False) # return joint_input_names[i], joint_shape_and_dtypes[i], name_to_position[i], pred_joint_input_names, pred_joint_shape_and_dtypes # return checkeach # check_fns[i] = check_tasks() joint_input_names[i], joint_shape_and_dtypes[i], name_to_position[ i] = merge_input_attrs(train_backbone.inputs_attr, task_attrs[i]) pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs( pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False) # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN] if DEBUG: print('----- for debug -----') print('joint input names:') print(joint_input_names) print('joint input shape and dtypes:') print(joint_shape_and_dtypes) # load data data_fns = {} for i in range(num_instances): print(instances[i].name + ": preparing data...", end='') instances[i].reader['train'].load_data() print('ok!') # merge dataset iterators and create net input vars iterators = [] prefixes = [] mrs = [] for inst in instances: iterators.append(inst.reader['train'].iterator()) prefixes.append(inst.name) mrs.append(inst.mix_ratio) joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE, return_type='dict') self._joint_iterator_fn = joint_iterator_fn input_attrs = {} net_inputs = {} bb_output_vars = {} bb_output_fns = {} # prepare predict vars for saving inference model pred_input_attrs = [[i, j, k] for i, ( j, k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)] pred_prog = fluid.Program() pred_init_prog = fluid.Program() self._pred_prog = pred_prog with fluid.program_guard(main_program=pred_prog, startup_program=pred_init_prog): pred_net_inputs = create_net_inputs(pred_input_attrs) pred_bb_output_vars = pred_backbone.build( pred_net_inputs, scope_name='__paddlepalm_') task_inputs = {} task_output_vars = {} task_fns = {} def get_loss(i): input_attrs[i] = [[m, j, k] for m, ( j, k) in zip(joint_input_names[i], joint_shape_and_dtypes[i])] net_inputs[i] = create_net_inputs(input_attrs[i], async=False) # net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3) bb_output_vars[i] = train_backbone.build( net_inputs[i], scope_name='__paddlepalm_') assert sorted(bb_output_vars[i].keys()) == sorted( train_backbone.outputs_attr.keys()) # build backbone and task layers task_inputs[i] = {'backbone': bb_output_vars[i]} task_inputs_from_reader = _decode_inputs(net_inputs[i], instances[i].name) task_inputs[i]['reader'] = task_inputs_from_reader scope = instances[i].task_reuse_scope + '/' with fluid.unique_name.guard(scope): output_vars = instances[i].build_task_layer(task_inputs[i], phase='train', scope=scope) output_vars = { instances[i].name + '/' + key: val for key, val in output_vars.items() } loss_var = output_vars[instances[i].name + '/loss'] task_output_vars[i] = output_vars if instances[i].is_target: with fluid.program_guard(pred_prog, pred_init_prog): cur_inputs = _decode_inputs(pred_net_inputs, instances[i].name) instances[i].pred_input = cur_inputs pred_task_inputs = { 'backbone': pred_bb_output_vars, 'reader': cur_inputs } scope = instances[i].task_reuse_scope + '/' with fluid.unique_name.guard(scope): instances[i].build_task_layer(pred_task_inputs, phase='pred', scope=scope) return loss_var for i in range(num_instances): def task_loss(): task_id = i return lambda: get_loss(task_id) task_fns[i] = task_loss() loss = layers.switch_case(branch_index=branch, branch_fns=task_fns) self._switched_loss = loss.name main_reader = main_inst.reader['train'] num_examples = main_reader.num_examples for inst in instances: max_train_steps = int( main_conf['num_epochs'] * inst.mix_ratio * (num_examples // main_conf['batch_size'] // dev_count)) if inst.is_target: print('{}: expected train steps {}.'.format( inst.name, max_train_steps)) inst.steps_pur_epoch = inst.reader[ 'train'].num_examples // main_conf['batch_size'] // dev_count inst.expected_train_steps = max_train_steps global_max_train_steps = int( main_conf['num_epochs'] * sum(mrs) * (num_examples // main_conf['batch_size'] // dev_count)) print( 'Estimated overall train steps {}.'.format(global_max_train_steps)) if 'warmup_proportion' in main_conf and main_conf[ 'warmup_proportion'] > 0: warmup_steps = int(global_max_train_steps * main_conf['warmup_proportion']) print('Warmup steps: ' + str(warmup_steps)) else: warmup_steps = 0 # build optimizer if 'optimizer' in main_conf: optim_mod = importlib.import_module(OPTIMIZER_DIR + '.' + main_conf['optimizer']) optimize = getattr(optim_mod, OPTIMIZE_METHOD) optimize(loss, main_conf, max_train_steps, warmup_steps, fluid.default_main_program()) loss.persistable = True if main_conf.get('use_ema', False): assert 'ema_decay' in main_conf, "ema_decay should be set when use_ema is enabled." ema = fluid.optimizer.ExponentialMovingAverage( main_conf['ema_decay']) ema.update() # prepare for train self.train_backbone = train_backbone self.train_program = fluid.CompiledProgram( fluid.default_main_program()).with_data_parallel( loss_name=loss.name) self.saver_program = fluid.default_main_program() self.main_inst = main_inst self.has_init_train = True self.has_init_pred = True self._net_inputs = net_inputs self.exe.run(fluid.default_startup_program()) print("\nRandomly initialize parameters...\n")
def type_error_default(): layers.switch_case(branch_index=key_int32, branch_fns=[(1, fn_1), (2, fn_2)], default=1)
def value_error_key(): layers.switch_case(branch_index=key_int32, branch_fns=[(2, fn_1), (2, fn_2)], default=fn_3)
def type_error_key(): layers.switch_case(branch_index=key_int32, branch_fns=[(2.3, 2)], default=fn_3)
def type_error_index_fn_pair_2(): layers.switch_case(branch_index=key_int32, branch_fns=[(1, 2, 3)], default=fn_3)
def type_error_branch_fns(): layers.switch_case(branch_index=key_int32, branch_fns=1, default=fn_3)
def dtype_error_branch_index(): layers.switch_case(branch_index=key_float32, branch_fns=[(1, fn_1)], default=fn_3)
def type_error_branch_index(): layers.switch_case(branch_index=1, branch_fns=[(1, fn_1)], default=fn_3)
def test_nested_switch_case(self): def fn_1(x=1): out = layers.switch_case(branch_index=layers.fill_constant( shape=[1], dtype='int32', value=x), branch_fns={ 1: partial(layers.fill_constant, shape=[1], dtype='int32', value=1), x: partial(layers.fill_constant, shape=[2], dtype='int32', value=x) }) return out def fn_2(x=2): out = layers.switch_case(branch_index=layers.fill_constant( shape=[1], dtype='int32', value=2), branch_fns={ 1: partial(layers.fill_constant, shape=[4, 3], dtype='int32', value=1), 2: partial(fn_1, x=x) }) return out def fn_3(): out = layers.switch_case(branch_index=layers.fill_constant( shape=[1], dtype='int32', value=3), branch_fns={ 1: partial(layers.fill_constant, shape=[4, 3], dtype='int32', value=1), 3: partial(fn_2, x=3) }) return out main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): index_1 = fluid.data(name="index_1", shape=[1], dtype='uint8') index_2 = layers.fill_constant(shape=[1], dtype='int32', value=2) index_3 = layers.fill_constant(shape=[1], dtype='int64', value=3) out_1 = layers.switch_case(branch_index=index_1, branch_fns={ 1: fn_1, 2: fn_2, 3: fn_3 }) out_2 = layers.switch_case(branch_index=index_2, branch_fns={ 1: fn_1, 2: fn_2, 3: fn_3 }) out_3 = layers.switch_case(branch_index=index_3, branch_fns={ 1: fn_1, 2: fn_2, 3: fn_3 }) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, feed={"index_1": np.array([1], dtype="uint8")}, fetch_list=[out_1, out_2, out_3]) self.assertTrue(np.allclose(res[0], 1), "result is {} but answer is {}".format(res[0], 1)) self.assertTrue(np.allclose(res[1], 2), "result is {} but answer is {}".format(res[1], 2)) self.assertTrue(np.allclose(res[2], 3), "result is {} but answer is {}".format(res[2], 3))