def build_predict_forward(self, pred_backbone, pred_head):
        """
        Build computation graph for evaluation and prediction.

        Arguments:
            - pred_backbone: a Backbone object with phase == 'predict'. For evaluating model during training, the predict backbone should keep the same with train backbone.
            - pred_head: a Head object with phase == 'predict'. For evaluating model during training, the predict head should keep the same with train head.
        
        Return:
            - output_vars: dict type. Each value is a computational graph variable(node) argumented by pred_head outputs_attr.
        """
        self._pred_head = pred_head
        self._pred_backbone = pred_backbone
        # self._pred_reader = self._reader.clone(phase='pred')
        pred_task_attr_from_reader = helper.encode_inputs(self._pred_head.inputs_attrs['reader'], self.name)
        # pred_task_attr_from_reader = self._pred_head.inputs_attrs['reader']

        # _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.pred')

        # _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.pred')
        # _check_io(pred_parad.inputs_attrs['reader'], pred_reader.outputs_attr, in_name='task_paradigm.pred.reader', out_name='reader.pred')
        # _check_io(pred_parad.inputs_attrs['backbone'], pred_backbone.outputs_attr, in_name='task_paradigm.pred.backbone', out_name=bb_name+'_backbone')
        pred_input_names, pred_shape_and_dtypes, pred_name_to_position = reader_helper.merge_input_attrs(pred_backbone.inputs_attr, pred_task_attr_from_reader, insert_taskid=False)
        pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_input_names, pred_shape_and_dtypes)]
        self._pred_shape_and_dtypes = pred_shape_and_dtypes
        self._pred_name_to_position = pred_name_to_position

        pred_prog = fluid.Program()
        self._pred_prog = pred_prog
        pred_init_prog = fluid.Program()
        self._pred_init_prog = pred_init_prog
        with fluid.program_guard(pred_prog, pred_init_prog):
            pred_net_inputs = reader_helper.create_net_inputs(pred_input_attrs)
            # pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_')
            pred_bb_output_vars = pred_backbone.build(pred_net_inputs)
            self._pred_net_inputs = pred_net_inputs

        # prepare predict vars for saving inference model
        with fluid.program_guard(pred_prog, pred_init_prog):
            cur_inputs = helper.decode_inputs(pred_net_inputs, self.name)
            # self.pred_input = cur_inputs
            self._pred_input_name_list, self._pred_input_varname_list = \
                zip(*[[k, v.name] for k,v in cur_inputs.items()])

            pred_task_inputs = {'backbone': pred_bb_output_vars, 'reader': cur_inputs}
            scope = self.name + '.'
            with fluid.unique_name.guard(scope):
                output_vars = self._build_head(pred_task_inputs, phase='predict', scope=scope)

        if output_vars is not None:
            self._pred_fetch_name_list, self._pred_fetch_list = zip(*output_vars.items())
        else:
            self._pred_fetch_name_list = []
            self._pred_fetch_var_list = []

        if not self._multi_task:
            self._init_exe_prog(for_train=False)
            self._exe.run(self._pred_init_prog)
            
        return output_vars
Beispiel #2
0
    def _init_train(self):
        
        instances = self.instances
        Backbone = self.Backbone
        bb_conf = self.bb_conf
        bb_name = self.bb_name
        dev_count = self.dev_count
        num_instances = len(instances)
        mrs = self.mrs

        # set first_target/main task instance
        main_inst = None
        for inst in instances:
            if inst.is_target:
                main_inst = inst
                inst.is_first_target = True
                break
        main_conf = main_inst.config
        if not os.path.exists(main_conf['save_path']):
            os.makedirs(main_conf['save_path'])
            os.makedirs(os.path.join(main_conf['save_path'], 'ckpt'))
        
        # prepare backbone
        train_backbone = Backbone(bb_conf, phase='train')
        pred_backbone = Backbone(bb_conf, phase='pred')

        # create reader, task
        # then check i/o across reader, backbone and task_layer
        task_attrs = []
        pred_task_attrs = []
        for inst in instances:
            train_reader = inst.Reader(inst.config, phase='train')
            inst.reader['train'] = train_reader
            train_parad = inst.Paradigm(inst.config, phase='train', backbone_config=bb_conf)
            inst.task_layer['train'] = train_parad
            task_attr_from_reader = _encode_inputs(train_parad.inputs_attrs['reader'], inst.name)
            task_attrs.append(task_attr_from_reader)

            _check_io(train_backbone.inputs_attr, train_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.train')
            _check_io(train_parad.inputs_attrs['reader'], train_reader.outputs_attr, in_name='task_paradigm.train.reader', out_name='reader.train')
            _check_io(train_parad.inputs_attrs['backbone'], train_backbone.outputs_attr, in_name='task_paradigm.train.backbone', out_name=bb_name+'_backbone')

            if inst.is_target:
                if 'pred_file' not in inst.config:
                    inst.config['pred_file'] = ''
                pred_reader = inst.Reader(inst.config, phase='pred')
                pred_parad = inst.Paradigm(inst.config, phase='pred', backbone_config=bb_conf)
                inst.task_layer['pred'] = pred_parad
                task_attr_from_reader = _encode_inputs(pred_parad.inputs_attrs['reader'], inst.name)
                pred_task_attrs.append(task_attr_from_reader)
                _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.pred')
                _check_io(pred_parad.inputs_attrs['reader'], pred_reader.outputs_attr, in_name='task_paradigm.pred.reader', out_name='reader.pred')
                _check_io(pred_parad.inputs_attrs['backbone'], pred_backbone.outputs_attr, in_name='task_paradigm.pred.backbone', out_name=bb_name+'_backbone')

        # merge reader input attrs from backbone and task_instances
        joint_input_names, joint_shape_and_dtypes, name_to_position = merge_input_attrs(train_backbone.inputs_attr, task_attrs)
        pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False)
        # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN]

        if DEBUG:
            print('----- for debug -----')
            print('joint input names:')
            print(joint_input_names)
            print('joint input shape and dtypes:')
            print(joint_shape_and_dtypes)

        # load data
        for inst in instances:
            print(inst.name+": preparing data...", end='')
            inst.reader['train'].load_data()
            print('ok!')

        # merge dataset iterators and create net input vars
        iterators = []
        prefixes = []
        mrs = []

        for inst in instances:
            iterators.append(inst.reader['train'].iterator())
            prefixes.append(inst.name)
            mrs.append(inst.mix_ratio)

        joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE, return_type='dict')
        self._joint_iterator_fn = joint_iterator_fn

        input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)]
        pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)]
        # net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3)
        net_inputs = create_net_inputs(input_attrs, async=False)
        self._net_inputs = net_inputs

        # build backbone and task layers
        train_prog = fluid.default_main_program()
        train_init_prog = fluid.default_startup_program()
        bb_output_vars = train_backbone.build(net_inputs, scope_name='__paddlepalm_')

        assert sorted(bb_output_vars.keys()) == sorted(train_backbone.outputs_attr.keys())

        pred_prog = fluid.Program()
        pred_init_prog = fluid.Program()

        with fluid.program_guard(main_program = pred_prog, startup_program = pred_init_prog):
            pred_net_inputs = create_net_inputs(pred_input_attrs)
            pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_')

        fluid.framework.switch_main_program(train_prog)
        fluid.framework.switch_startup_program(train_init_prog)

        task_output_vars = {}
        for inst in instances:
            task_inputs = {'backbone': bb_output_vars}
            task_inputs_from_reader = _decode_inputs(net_inputs, inst.name)
            task_inputs['reader'] = task_inputs_from_reader
       
            scope = inst.task_reuse_scope + '/'
            with fluid.unique_name.guard(scope):
               
                output_vars = inst.build_task_layer(task_inputs, phase='train', scope=scope)
                output_vars = {inst.name+'/'+key: val for key, val in output_vars.items()}
                old = len(task_output_vars) # for debug
                task_output_vars.update(output_vars)
                assert len(task_output_vars) - old == len(output_vars) # for debug
            # prepare predict vars for saving inference model
            if inst.is_target:
                with fluid.program_guard(pred_prog, pred_init_prog):
                    cur_inputs = _decode_inputs(pred_net_inputs, inst.name)
                    inst.pred_input = cur_inputs
                    pred_task_inputs = {'backbone': pred_bb_output_vars, 'reader': cur_inputs}
                    scope = inst.task_reuse_scope + '/'
                    with fluid.unique_name.guard(scope):
                        inst.build_task_layer(pred_task_inputs, phase='pred', scope=scope)


        bb_fetches = {k: v.name for k,v in bb_output_vars.items()}
        task_fetches = {k: v.name for k,v in task_output_vars.items()}
        fetches = task_fetches
        fetches['__task_id'] = net_inputs['__task_id'].name

        # compute loss
        task_id_var = net_inputs['__task_id']
        task_id_vec = fluid.one_hot(task_id_var, num_instances)
        losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0)
        loss = layers.reduce_sum(task_id_vec * losses)

        main_reader = main_inst.reader['train']

        num_examples = main_reader.num_examples
        for inst in instances:
            max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * (num_examples // main_conf['batch_size']  // dev_count))
            if inst.is_target:
                print('{}: expected train steps {}.'.format(inst.name, max_train_steps))
            inst.steps_pur_epoch = inst.reader['train'].num_examples // main_conf['batch_size']  // dev_count
            inst.expected_train_steps = max_train_steps

        global_max_train_steps = int(main_conf['num_epochs'] * sum(mrs) * (num_examples // main_conf['batch_size']  // dev_count))
        print('Estimated overall train steps {}.'.format(global_max_train_steps))

        if 'warmup_proportion' in main_conf and main_conf['warmup_proportion'] > 0:
            warmup_steps = int(global_max_train_steps * main_conf['warmup_proportion'])
            print('Warmup steps: '+str(warmup_steps))
        else:
            warmup_steps = 0

        # build optimizer
        if 'optimizer' in main_conf:
            optim_mod = importlib.import_module(OPTIMIZER_DIR + '.' + main_conf['optimizer'])
            optimize = getattr(optim_mod, OPTIMIZE_METHOD)
            optimize(loss, main_conf, max_train_steps, warmup_steps, fluid.default_main_program())

            loss.persistable = True
            if main_conf.get('use_ema', False):
                assert 'ema_decay' in main_conf, "ema_decay should be set when use_ema is enabled."
                ema = fluid.optimizer.ExponentialMovingAverage(main_conf['ema_decay'])
                ema.update()

        # prepare for train
        self.train_backbone = train_backbone
        self.train_program = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(loss_name=loss.name)
        self.saver_program = fluid.default_main_program()

        self.main_inst = main_inst
        self.fetches = fetches
        self.has_init_train = True
        self.has_init_pred = True

        self.exe.run(fluid.default_startup_program())
        print("\nRandomly initialize parameters...\n")
Beispiel #3
0
    def build_forward(self, backbone, task_head):
        """
        Build forward computation graph for training, which usually built from input layer to loss node.

        Args:
            backbone: a Backbone object with phase == 'train', which is used to extract multi-level text features, e.g., contextual word embedding and sentence embedding.
            head: a Head object with phase == 'train', which is used to build task specific output layers.
        
        Return:
            loss_var: a Variable object. The computational graph variable(node) of loss.
        """


        self._task_head = task_head
        self._backbone = backbone

        self._build_forward = True
        
        # create reader, task
        # then check i/o across reader, backbone and task_layer
        task_attrs = []
        pred_task_attrs = []

        task_attr_from_reader = helper.encode_inputs(self._task_head.inputs_attrs['reader'], self.name)

        # merge reader input attrs from backbone and task_instances
        input_names, shape_and_dtypes, name_to_position = reader_helper.merge_input_attrs(backbone.inputs_attr, task_attr_from_reader, insert_taskid=False)
        # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN]
        self._shape_and_dtypes = shape_and_dtypes
        self._name_to_position = name_to_position
        self._input_names = input_names

        if DEBUG:
            print('----- for debug -----')
            print('joint input names:')
            print(joint_input_names)
            print('joint input shape and dtypes:')
            print(joint_shape_and_dtypes)

        input_attrs = [[i, j, k] for i, (j,k) in zip(input_names, shape_and_dtypes)]

        train_prog = fluid.Program()
        train_init_prog = fluid.Program()

        if not self._lock_prog:
            self._train_prog = train_prog
            self._train_init_prog = train_init_prog

        if not self._lock_prog:
            with fluid.program_guard(train_prog, train_init_prog):
                net_inputs = reader_helper.create_net_inputs(input_attrs, is_async=False)
                bb_output_vars = backbone.build(net_inputs)
        else:
            net_inputs = reader_helper.create_net_inputs(input_attrs, is_async=False)
            bb_output_vars = backbone.build(net_inputs)
        self._net_inputs = net_inputs
        assert sorted(bb_output_vars.keys()) == sorted(backbone.outputs_attr.keys())

        task_output_vars = {}
        task_inputs = {'backbone': bb_output_vars}
        task_inputs_from_reader = helper.decode_inputs(net_inputs, self.name)
        task_inputs['reader'] = task_inputs_from_reader

        scope = self.name+'.'
        if not self._lock_prog:
            with fluid.program_guard(train_prog, train_init_prog):
                with fluid.unique_name.guard(scope):
                    output_vars = self._build_head(task_inputs, phase='train', scope=scope)
        else:
            with fluid.unique_name.guard(scope):
                output_vars = self._build_head(task_inputs, phase='train', scope=scope)

        output_vars = {self.name+'.'+key: val for key, val in output_vars.items()}
        old = len(task_output_vars) # for debug
        task_output_vars.update(output_vars)
        assert len(task_output_vars) - old == len(output_vars) # for debug

        bb_fetches = {k: v.name for k,v in bb_output_vars.items()}
        task_fetches = {k: v.name for k,v in task_output_vars.items()}
        self._fetches = task_fetches
        self._fetch_names, self._fetch_list = zip(*self._fetches.items())
        if not self._lock_prog:
            with fluid.program_guard(train_prog, train_init_prog):
                loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss'])
        else:
            loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss'])

        self._loss_var = loss_var

        if not self._multi_task:
            self._init_exe_prog(for_train=True)

        return loss_var
    def build_forward(self, backbone, task_head):
        """
        Build forward computation graph for training, which usually built from input layer to loss node.

        Args:
            backbone: a Backbone object with phase == 'train', which is used to extract multi-level text features, e.g., contextual word embedding and sentence embedding.
            head: a Head object with phase == 'train', which is used to build task specific output layers.
        
        Return:
            loss_var: a Variable object. The computational graph variable(node) of loss.
        """

        # assert not self._multi_task, "you cannot build_forward in trainer when a train is wrapper by MultiHeadTrainer."
        self._task_head = task_head
        self._backbone = backbone

        # assert self._backbone is not None, "backbone is required for Trainer to build net forward to run with single task mode"
        self._build_forward = True

        # create reader, task
        # then check i/o across reader, backbone and task_layer
        task_attrs = []
        pred_task_attrs = []

        task_attr_from_reader = helper.encode_inputs(
            self._task_head.inputs_attrs['reader'], self.name)
        # task_attr_from_reader = self._task_head.inputs_attrs['reader']

        # _check_io(backbone.inputs_attr, inst._reader['train'].outputs_attr, in_name=bb_name+'_backbone', out_name='reader.train')
        # _check_io(inst.taskblock['train'].inputs_attrs['reader'], inst._reader['train'].outputs_attr, in_name='task_paradigm.train.reader', out_name='reader.train')
        # _check_io(inst._taskblock['train'].inputs_attrs['backbone'], train_backbone.outputs_attr, in_name='task_paradigm.train.backbone', out_name=bb_name+'_backbone')

        # merge reader input attrs from backbone and task_instances
        input_names, shape_and_dtypes, name_to_position = reader_helper.merge_input_attrs(
            backbone.inputs_attr, task_attr_from_reader, insert_taskid=False)
        # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN]
        self._shape_and_dtypes = shape_and_dtypes
        self._name_to_position = name_to_position
        self._input_names = input_names

        if DEBUG:
            print('----- for debug -----')
            print('joint input names:')
            print(joint_input_names)
            print('joint input shape and dtypes:')
            print(joint_shape_and_dtypes)

        input_attrs = [[i, j, k]
                       for i, (j, k) in zip(input_names, shape_and_dtypes)]

        train_prog = fluid.Program()
        train_init_prog = fluid.Program()

        if not self._lock_prog:
            self._train_prog = train_prog
            self._train_init_prog = train_init_prog

        if not self._lock_prog:
            with fluid.program_guard(train_prog, train_init_prog):
                net_inputs = reader_helper.create_net_inputs(input_attrs,
                                                             is_async=False)
                bb_output_vars = backbone.build(net_inputs)
        else:
            net_inputs = reader_helper.create_net_inputs(input_attrs,
                                                         is_async=False)
            bb_output_vars = backbone.build(net_inputs)
        self._net_inputs = net_inputs
        assert sorted(bb_output_vars.keys()) == sorted(
            backbone.outputs_attr.keys())

        # self._bb_output_vars.keys

        # fluid.framework.switch_main_program(train_prog)
        # fluid.framework.switch_startup_program(train_init_prog)

        task_output_vars = {}
        task_inputs = {'backbone': bb_output_vars}
        task_inputs_from_reader = helper.decode_inputs(net_inputs, self.name)
        task_inputs['reader'] = task_inputs_from_reader

        scope = self.name + '.'
        if not self._lock_prog:
            with fluid.program_guard(train_prog, train_init_prog):
                with fluid.unique_name.guard(scope):
                    output_vars = self._build_head(task_inputs,
                                                   phase='train',
                                                   scope=scope)
        else:
            with fluid.unique_name.guard(scope):
                output_vars = self._build_head(task_inputs,
                                               phase='train',
                                               scope=scope)

        output_vars = {
            self.name + '.' + key: val
            for key, val in output_vars.items()
        }
        old = len(task_output_vars)  # for debug
        task_output_vars.update(output_vars)
        assert len(task_output_vars) - old == len(output_vars)  # for debug

        bb_fetches = {k: v.name for k, v in bb_output_vars.items()}
        task_fetches = {k: v.name for k, v in task_output_vars.items()}
        self._fetches = task_fetches
        self._fetch_names, self._fetch_list = zip(*self._fetches.items())
        # fetches = task_fetches
        # fetches['__task_id'] = net_inputs['__task_id'].name

        # compute loss
        # task_id_var = net_inputs['__task_id']
        # task_id_vec = layers.one_hot(task_id_var, num_instances)
        # losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0)
        # loss = layers.reduce_sum(task_id_vec * losses)
        if not self._lock_prog:
            with fluid.program_guard(train_prog, train_init_prog):
                loss_var = fluid.layers.reduce_sum(task_output_vars[self.name +
                                                                    '.loss'])
        else:
            loss_var = fluid.layers.reduce_sum(task_output_vars[self.name +
                                                                '.loss'])

        # for _id, block in enumerate(self._train_prog.blocks):
        #   for var in block.vars:
        #     print("[debug] : %d, %s" % (_id, var))
        self._loss_var = loss_var

        if not self._multi_task:
            self._init_exe_prog(for_train=True)

        return loss_var