Beispiel #1
0
    def test_return_single_var(self):
        def fn_1():
            return layers.fill_constant(shape=[4, 2], dtype='int32', value=1)

        def fn_2():
            return layers.fill_constant(shape=[4, 2], dtype='int32', value=2)

        def fn_3():
            return layers.fill_constant(shape=[4, 3], dtype='int32', value=3)

        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            index_1 = layers.fill_constant(shape=[1], dtype='int32', value=1)
            index_2 = layers.fill_constant(shape=[1], dtype='int32', value=2)
            index_5 = layers.fill_constant(shape=[1], dtype='int32', value=5)

            # call fn_1
            out_0 = layers.switch_case(branch_index=index_1,
                                       branch_fns={
                                           1: fn_1,
                                           2: fn_2,
                                           3: fn_3
                                       })

            # call fn_2 : branch_fns={0: fn_1, 1:fn_2, 2:fn_3}
            out_1 = layers.switch_case(branch_index=index_1,
                                       branch_fns=(fn_1, fn_2, fn_3))

            # call default fn_3
            out_2 = layers.switch_case(branch_index=index_5,
                                       branch_fns=((1, fn_1), (2, fn_2)),
                                       default=fn_3)

            # no default, call fn_2
            out_3 = layers.switch_case(branch_index=index_2,
                                       branch_fns=[(1, fn_1), (2, fn_2)])

            # no default, call fn_2 but branch_index is 5
            out_4 = layers.switch_case(branch_index=index_5,
                                       branch_fns=[(1, fn_1), (3, fn_2),
                                                   (2, fn_3)])

            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
            exe = fluid.Executor(place)

            res = exe.run(main_program,
                          fetch_list=[out_0, out_1, out_2, out_3, out_4])

            self.assertTrue(np.allclose(res[0], 1),
                            "result is {} but answer is {}".format(res[0], 1))
            self.assertTrue(np.allclose(res[1], 2),
                            "result is {} but answer is {}".format(res[0], 2))
            self.assertTrue(np.allclose(res[2], 3),
                            "result is {} but answer is {}".format(res[0], 3))
            self.assertTrue(np.allclose(res[3], 2),
                            "result is {} but answer is {}".format(res[0], 2))
            self.assertTrue(np.allclose(res[4], 2),
                            "result is {} but answer is {}".format(res[0], 2))
Beispiel #2
0
    def build_predict_forward(self):
        head_dict = {}
        backbone = self._trainers[0]._pred_backbone
        for i in self._trainers:
            assert i._pred_head is not None and i._pred_backbone is not None, "You should build_predict_forward for the {} task".format(
                i._name)
            assert i._pred_backbone == backbone, "The backbone for each task must be the same"
            head_dict[i._name] = i._pred_head

        pred_prog = fluid.Program()
        pred_init_prog = fluid.Program()
        self._pred_prog = pred_prog
        self._pred_init_prog = pred_init_prog

        def get_loss(i):
            head = head_dict[self._trainers[i].name]
            self._trainers[i]._lock_prog = True
            pred_vars = self._trainers[i].build_predict_forward(backbone, head)
            self._trainers[i]._lock_prog = False
            # return loss_var

        task_fns = {
            i: lambda i=i: get_loss(i)
            for i in range(len(self._trainers))
        }

        with fluid.program_guard(pred_prog, pred_init_prog):
            task_id_var = fluid.data(name="__task_id",
                                     shape=[1],
                                     dtype='int64')

            loss_var = layers.switch_case(branch_index=task_id_var,
                                          branch_fns=task_fns)
        if not self._multi_task:
            self._init_exe_prog(for_train=False)
Beispiel #3
0
    def build_forward(self):
        """
        Build forward computation graph for training, which usually built from input layer to loss node.

        Return:
            - loss_var: a Variable object. The computational graph variable(node) of loss.
        """
        head_dict = {}
        backbone = self._trainers[0]._backbone
        for i in self._trainers:
            assert i._task_head is not None and i._backbone is not None, "You should build forward for the {} task".format(
                i._name)
            assert i._backbone == backbone, "The backbone for each task must be the same"
            head_dict[i._name] = i._task_head

        train_prog = fluid.Program()
        train_init_prog = fluid.Program()
        self._train_prog = train_prog
        self._train_init_prog = train_init_prog

        def get_loss(i):
            head = head_dict[self._trainers[i].name]
            self._trainers[i]._lock_prog = True
            loss_var = self._trainers[i].build_forward(backbone, head)
            self._trainers[i]._lock_prog = False
            return loss_var

        task_fns = {
            i: lambda i=i: get_loss(i)
            for i in range(len(self._trainers))
        }

        with fluid.program_guard(train_prog, train_init_prog):
            task_id_var = fluid.data(name="__task_id",
                                     shape=[1],
                                     dtype='int64')

            loss_var = layers.switch_case(branch_index=task_id_var,
                                          branch_fns=task_fns)
        self._task_id_var = task_id_var
        self._loss_var = loss_var
        self._fetch_list = [loss_var.name]
        # for b in train_prog.blocks:
        #     for var in b.vars:
        #         pass
        # if 'task_id' in var:
        #     print(var)
        #     exit()
        # print(var)
        if not self._multi_task:
            self._init_exe_prog(for_train=True)
        return loss_var
Beispiel #4
0
 def fn_3():
     out = layers.switch_case(branch_index=layers.fill_constant(
         shape=[1], dtype='int32', value=3),
                              branch_fns={
                                  1:
                                  partial(layers.fill_constant,
                                          shape=[4, 3],
                                          dtype='int32',
                                          value=1),
                                  3:
                                  partial(fn_2, x=3)
                              })
     return out
Beispiel #5
0
 def fn_1(x=1):
     out = layers.switch_case(branch_index=layers.fill_constant(
         shape=[1], dtype='int32', value=x),
                              branch_fns={
                                  1:
                                  partial(layers.fill_constant,
                                          shape=[1],
                                          dtype='int32',
                                          value=1),
                                  x:
                                  partial(layers.fill_constant,
                                          shape=[2],
                                          dtype='int32',
                                          value=x)
                              })
     return out
Beispiel #6
0
        def body(i):
            def fn_add_three():
                data_add_three = layers.elementwise_add(x=i, y=three)
                return data_add_three

            def fn_square():
                data_mul_data = layers.elementwise_mul(x=i, y=i)
                return data_mul_data

            def fn_add_one():
                data_add_one = layers.elementwise_add(x=i, y=one)
                return data_add_one

            return layers.switch_case(
                branch_index=i,
                branch_fns={2: fn_add_three,
                            5: fn_square},
                default=fn_add_one)
Beispiel #7
0
    def test_return_var_tuple(self):
        def fn_1():
            return layers.fill_constant(shape=[1, 2], dtype='int32',
                                        value=1), layers.fill_constant(
                                            shape=[2, 3],
                                            dtype='float32',
                                            value=2)

        def fn_2():
            return layers.fill_constant(shape=[3, 4], dtype='int32',
                                        value=3), layers.fill_constant(
                                            shape=[4, 5],
                                            dtype='float32',
                                            value=4)

        def fn_3():
            return layers.fill_constant(shape=[5], dtype='int32',
                                        value=5), layers.fill_constant(
                                            shape=[5, 6],
                                            dtype='float32',
                                            value=6)

        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            index_1 = layers.fill_constant(shape=[1], dtype='int32', value=1)

            out = layers.switch_case(index_1, ((1, fn_1), (2, fn_2)), fn_3)

            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
            exe = fluid.Executor(place)
            ret = exe.run(main_program, fetch_list=out)

            self.assertTrue(
                np.allclose(np.asarray(ret[0]), np.full((1, 2), 1, np.int32)))
            self.assertTrue(
                np.allclose(np.asarray(ret[1]), np.full((2, 3), 2,
                                                        np.float32)))
Beispiel #8
0
    def _init_train(self):

        instances = self.instances
        Backbone = self.Backbone
        bb_conf = self.bb_conf
        bb_name = self.bb_name
        dev_count = self.dev_count
        num_instances = len(instances)
        mrs = self.mrs
        branch = fluid.data(name="branch", shape=[1], dtype='int64')

        # set first_target/main task instance
        main_inst = None
        for inst in instances:
            if inst.is_target:
                main_inst = inst
                inst.is_first_target = True
                break
        main_conf = main_inst.config
        if not os.path.exists(main_conf['save_path']):
            os.makedirs(main_conf['save_path'])
            os.makedirs(os.path.join(main_conf['save_path'], 'ckpt'))

        # prepare backbone
        train_backbone = Backbone(bb_conf, phase='train')
        pred_backbone = Backbone(bb_conf, phase='pred')

        # create reader, task
        # then check i/o across reader, backbone and task_layer

        # check_fns = {}
        task_attrs = {}
        pred_task_attrs = []
        joint_input_names = {}
        joint_shape_and_dtypes = {}
        name_to_position = {}
        for i in range(num_instances):
            # def check_tasks():
            #     i = s
            #     def checkeach():

            train_reader = instances[i].Reader(instances[i].config,
                                               phase='train')
            instances[i].reader['train'] = train_reader
            train_parad = instances[i].Paradigm(instances[i].config,
                                                phase='train',
                                                backbone_config=bb_conf)
            instances[i].task_layer['train'] = train_parad
            task_attr_from_reader = _encode_inputs(
                train_parad.inputs_attrs['reader'], instances[i].name)
            task_attrs[i] = task_attr_from_reader

            _check_io(train_backbone.inputs_attr,
                      train_reader.outputs_attr,
                      in_name=bb_name + '_backbone',
                      out_name='reader.train')
            _check_io(train_parad.inputs_attrs['reader'],
                      train_reader.outputs_attr,
                      in_name='task_paradigm.train.reader',
                      out_name='reader.train')
            _check_io(train_parad.inputs_attrs['backbone'],
                      train_backbone.outputs_attr,
                      in_name='task_paradigm.train.backbone',
                      out_name=bb_name + '_backbone')
            # merge reader input attrs from backbone and task_instances
            # pred_joint_input_names = []
            # pred_joint_shape_and_dtypes = []
            if instances[i].is_target:
                if 'pred_file' not in instances[i].config:
                    instances[i].config['pred_file'] = ''
                pred_reader = instances[i].Reader(instances[i].config,
                                                  phase='pred')
                pred_parad = instances[i].Paradigm(instances[i].config,
                                                   phase='pred',
                                                   backbone_config=bb_conf)
                instances[i].task_layer['pred'] = pred_parad
                task_attr_from_reader = _encode_inputs(
                    pred_parad.inputs_attrs['reader'], instances[i].name)
                pred_task_attrs.append(task_attr_from_reader)
                _check_io(pred_backbone.inputs_attr,
                          pred_reader.outputs_attr,
                          in_name=bb_name + '_backbone',
                          out_name='reader.pred')
                _check_io(pred_parad.inputs_attrs['reader'],
                          pred_reader.outputs_attr,
                          in_name='task_paradigm.pred.reader',
                          out_name='reader.pred')
                _check_io(pred_parad.inputs_attrs['backbone'],
                          pred_backbone.outputs_attr,
                          in_name='task_paradigm.pred.backbone',
                          out_name=bb_name + '_backbone')
                # pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False)
                #     return joint_input_names[i], joint_shape_and_dtypes[i], name_to_position[i], pred_joint_input_names, pred_joint_shape_and_dtypes
                #   return checkeach
                # check_fns[i] = check_tasks()
            joint_input_names[i], joint_shape_and_dtypes[i], name_to_position[
                i] = merge_input_attrs(train_backbone.inputs_attr,
                                       task_attrs[i])

        pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(
            pred_backbone.inputs_attr,
            pred_task_attrs,
            insert_taskid=False,
            insert_batchsize=False,
            insert_seqlen=False,
            insert_batchsize_x_seqlen=False)

        # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN]

        if DEBUG:
            print('----- for debug -----')
            print('joint input names:')
            print(joint_input_names)
            print('joint input shape and dtypes:')
            print(joint_shape_and_dtypes)

        # load data
        data_fns = {}
        for i in range(num_instances):
            print(instances[i].name + ": preparing data...", end='')
            instances[i].reader['train'].load_data()
            print('ok!')

        # merge dataset iterators and create net input vars
        iterators = []
        prefixes = []
        mrs = []

        for inst in instances:
            iterators.append(inst.reader['train'].iterator())
            prefixes.append(inst.name)
            mrs.append(inst.mix_ratio)

        joint_iterator_fn = create_joint_iterator_fn(iterators,
                                                     prefixes,
                                                     joint_shape_and_dtypes,
                                                     mrs,
                                                     name_to_position,
                                                     dev_count=dev_count,
                                                     verbose=VERBOSE,
                                                     return_type='dict')
        self._joint_iterator_fn = joint_iterator_fn

        input_attrs = {}
        net_inputs = {}
        bb_output_vars = {}
        bb_output_fns = {}

        # prepare predict vars for saving inference model
        pred_input_attrs = [[i, j, k] for i, (
            j, k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)]
        pred_prog = fluid.Program()
        pred_init_prog = fluid.Program()
        self._pred_prog = pred_prog

        with fluid.program_guard(main_program=pred_prog,
                                 startup_program=pred_init_prog):
            pred_net_inputs = create_net_inputs(pred_input_attrs)
            pred_bb_output_vars = pred_backbone.build(
                pred_net_inputs, scope_name='__paddlepalm_')

        task_inputs = {}
        task_output_vars = {}
        task_fns = {}

        def get_loss(i):
            input_attrs[i] = [[m, j, k] for m, (
                j, k) in zip(joint_input_names[i], joint_shape_and_dtypes[i])]
            net_inputs[i] = create_net_inputs(input_attrs[i], async=False)
            # net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3)
            bb_output_vars[i] = train_backbone.build(
                net_inputs[i], scope_name='__paddlepalm_')
            assert sorted(bb_output_vars[i].keys()) == sorted(
                train_backbone.outputs_attr.keys())

            # build backbone and task layers
            task_inputs[i] = {'backbone': bb_output_vars[i]}
            task_inputs_from_reader = _decode_inputs(net_inputs[i],
                                                     instances[i].name)
            task_inputs[i]['reader'] = task_inputs_from_reader

            scope = instances[i].task_reuse_scope + '/'
            with fluid.unique_name.guard(scope):
                output_vars = instances[i].build_task_layer(task_inputs[i],
                                                            phase='train',
                                                            scope=scope)
                output_vars = {
                    instances[i].name + '/' + key: val
                    for key, val in output_vars.items()
                }
                loss_var = output_vars[instances[i].name + '/loss']
                task_output_vars[i] = output_vars

            if instances[i].is_target:
                with fluid.program_guard(pred_prog, pred_init_prog):
                    cur_inputs = _decode_inputs(pred_net_inputs,
                                                instances[i].name)
                    instances[i].pred_input = cur_inputs
                    pred_task_inputs = {
                        'backbone': pred_bb_output_vars,
                        'reader': cur_inputs
                    }
                    scope = instances[i].task_reuse_scope + '/'
                    with fluid.unique_name.guard(scope):
                        instances[i].build_task_layer(pred_task_inputs,
                                                      phase='pred',
                                                      scope=scope)
            return loss_var

        for i in range(num_instances):

            def task_loss():
                task_id = i
                return lambda: get_loss(task_id)

            task_fns[i] = task_loss()

        loss = layers.switch_case(branch_index=branch, branch_fns=task_fns)
        self._switched_loss = loss.name
        main_reader = main_inst.reader['train']

        num_examples = main_reader.num_examples
        for inst in instances:
            max_train_steps = int(
                main_conf['num_epochs'] * inst.mix_ratio *
                (num_examples // main_conf['batch_size'] // dev_count))
            if inst.is_target:
                print('{}: expected train steps {}.'.format(
                    inst.name, max_train_steps))
            inst.steps_pur_epoch = inst.reader[
                'train'].num_examples // main_conf['batch_size'] // dev_count
            inst.expected_train_steps = max_train_steps

        global_max_train_steps = int(
            main_conf['num_epochs'] * sum(mrs) *
            (num_examples // main_conf['batch_size'] // dev_count))
        print(
            'Estimated overall train steps {}.'.format(global_max_train_steps))

        if 'warmup_proportion' in main_conf and main_conf[
                'warmup_proportion'] > 0:
            warmup_steps = int(global_max_train_steps *
                               main_conf['warmup_proportion'])
            print('Warmup steps: ' + str(warmup_steps))
        else:
            warmup_steps = 0

        # build optimizer
        if 'optimizer' in main_conf:
            optim_mod = importlib.import_module(OPTIMIZER_DIR + '.' +
                                                main_conf['optimizer'])
            optimize = getattr(optim_mod, OPTIMIZE_METHOD)
            optimize(loss, main_conf, max_train_steps, warmup_steps,
                     fluid.default_main_program())

            loss.persistable = True
            if main_conf.get('use_ema', False):
                assert 'ema_decay' in main_conf, "ema_decay should be set when use_ema is enabled."
                ema = fluid.optimizer.ExponentialMovingAverage(
                    main_conf['ema_decay'])
                ema.update()

        # prepare for train
        self.train_backbone = train_backbone
        self.train_program = fluid.CompiledProgram(
            fluid.default_main_program()).with_data_parallel(
                loss_name=loss.name)
        self.saver_program = fluid.default_main_program()

        self.main_inst = main_inst
        self.has_init_train = True
        self.has_init_pred = True
        self._net_inputs = net_inputs

        self.exe.run(fluid.default_startup_program())
        print("\nRandomly initialize parameters...\n")
Beispiel #9
0
 def type_error_default():
     layers.switch_case(branch_index=key_int32,
                        branch_fns=[(1, fn_1), (2, fn_2)],
                        default=1)
Beispiel #10
0
 def value_error_key():
     layers.switch_case(branch_index=key_int32,
                        branch_fns=[(2, fn_1), (2, fn_2)],
                        default=fn_3)
Beispiel #11
0
 def type_error_key():
     layers.switch_case(branch_index=key_int32,
                        branch_fns=[(2.3, 2)],
                        default=fn_3)
Beispiel #12
0
 def type_error_index_fn_pair_2():
     layers.switch_case(branch_index=key_int32,
                        branch_fns=[(1, 2, 3)],
                        default=fn_3)
Beispiel #13
0
 def type_error_branch_fns():
     layers.switch_case(branch_index=key_int32,
                        branch_fns=1,
                        default=fn_3)
Beispiel #14
0
 def dtype_error_branch_index():
     layers.switch_case(branch_index=key_float32,
                        branch_fns=[(1, fn_1)],
                        default=fn_3)
Beispiel #15
0
 def type_error_branch_index():
     layers.switch_case(branch_index=1,
                        branch_fns=[(1, fn_1)],
                        default=fn_3)
Beispiel #16
0
    def test_nested_switch_case(self):
        def fn_1(x=1):
            out = layers.switch_case(branch_index=layers.fill_constant(
                shape=[1], dtype='int32', value=x),
                                     branch_fns={
                                         1:
                                         partial(layers.fill_constant,
                                                 shape=[1],
                                                 dtype='int32',
                                                 value=1),
                                         x:
                                         partial(layers.fill_constant,
                                                 shape=[2],
                                                 dtype='int32',
                                                 value=x)
                                     })
            return out

        def fn_2(x=2):
            out = layers.switch_case(branch_index=layers.fill_constant(
                shape=[1], dtype='int32', value=2),
                                     branch_fns={
                                         1:
                                         partial(layers.fill_constant,
                                                 shape=[4, 3],
                                                 dtype='int32',
                                                 value=1),
                                         2:
                                         partial(fn_1, x=x)
                                     })
            return out

        def fn_3():
            out = layers.switch_case(branch_index=layers.fill_constant(
                shape=[1], dtype='int32', value=3),
                                     branch_fns={
                                         1:
                                         partial(layers.fill_constant,
                                                 shape=[4, 3],
                                                 dtype='int32',
                                                 value=1),
                                         3:
                                         partial(fn_2, x=3)
                                     })
            return out

        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            index_1 = fluid.data(name="index_1", shape=[1], dtype='uint8')
            index_2 = layers.fill_constant(shape=[1], dtype='int32', value=2)
            index_3 = layers.fill_constant(shape=[1], dtype='int64', value=3)

            out_1 = layers.switch_case(branch_index=index_1,
                                       branch_fns={
                                           1: fn_1,
                                           2: fn_2,
                                           3: fn_3
                                       })
            out_2 = layers.switch_case(branch_index=index_2,
                                       branch_fns={
                                           1: fn_1,
                                           2: fn_2,
                                           3: fn_3
                                       })

            out_3 = layers.switch_case(branch_index=index_3,
                                       branch_fns={
                                           1: fn_1,
                                           2: fn_2,
                                           3: fn_3
                                       })

            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
            exe = fluid.Executor(place)

            res = exe.run(main_program,
                          feed={"index_1": np.array([1], dtype="uint8")},
                          fetch_list=[out_1, out_2, out_3])

            self.assertTrue(np.allclose(res[0], 1),
                            "result is {} but answer is {}".format(res[0], 1))
            self.assertTrue(np.allclose(res[1], 2),
                            "result is {} but answer is {}".format(res[1], 2))
            self.assertTrue(np.allclose(res[2], 3),
                            "result is {} but answer is {}".format(res[2], 3))