예제 #1
0
def ernie_encoder_with_paddle_hub(ernie_inputs, max_seq_len):
    ernie = hub.Module(name="ernie")
    inputs, outputs, program = ernie.context(
        trainable=True, max_seq_len=max_seq_len, learning_rate=1)

    main_program = fluid.default_main_program()
    input_dict = {
        inputs["input_ids"].name: ernie_inputs["src_ids"],
        inputs["segment_ids"].name: ernie_inputs["sent_ids"],
        inputs["position_ids"].name: ernie_inputs["pos_ids"],
        inputs["input_mask"].name: ernie_inputs["input_mask"]
    }

    hub.connect_program(
        pre_program=main_program,
        next_program=program,
        input_dict=input_dict,
        inplace=True)

    enc_out = outputs["sequence_output"]
    unpad_enc_out = fluid.layers.sequence_unpad(
        enc_out, length=ernie_inputs["seq_lens"])
    cls_feats = outputs["pooled_output"]

    embeddings = {
        "sentence_embeddings": cls_feats,
        "token_embeddings": unpad_enc_out,
    }

    for k, v in embeddings.items():
        v.persistable = True

    return embeddings
예제 #2
0
def create_model(args, pyreader_name, is_prediction=False):
    pyreader, ernie_inputs, labels = ernie_pyreader(pyreader_name, args.max_seq_len)
    module = hub.Module(name="ernie")
    inputs, outputs, program = module.context(trainable="True", max_seq_len=args.max_seq_len)
    input_dict = {
        inputs["input_ids"].name: ernie_inputs["src_ids"],
        inputs["position_ids"].name: ernie_inputs["pos_ids"],
        inputs["segment_ids"].name: ernie_inputs["sent_ids"],
        inputs["input_mask"].name: ernie_inputs["input_mask"],
    }
    hub.connect_program(
        pre_program=fluid.default_main_program(),
        next_program=program,
        input_dict=input_dict)

    cls_feats = fluid.layers.dropout(
        x=outputs["pooled_output"],
        dropout_prob=0.1,
        dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=cls_feats,
        size=args.num_labels,
        param_attr=fluid.ParamAttr(
            name="cls_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(
            name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=logits, label=labels, return_softmax=True)
    loss = fluid.layers.mean(x=ce_loss)

    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)

    graph_vars = {
        "loss": loss,
        "probs": probs,
        "accuracy": accuracy,
        "labels": labels,
        "num_seqs": num_seqs,
        "qids": ernie_inputs["qids"]
    }
    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
예제 #3
0
    def _build_env(self):
        if self.env.is_inititalized:
            return

        self._build_env_start_event()
        self.env.is_inititalized = True
        self.env.main_program = clone_program(self._base_main_program,
                                              for_test=False)

        self.env.startup_program = fluid.Program()
        with fluid.program_guard(self.env.main_program,
                                 self._base_startup_program):
            with fluid.unique_name.guard(self.env.UNG):
                self.env.outputs = self._build_net()
                if self.is_train_phase or self.is_test_phase:
                    self.env.labels = self._add_label()
                    self.env.loss = self._add_loss()
                    self.env.metrics = self._add_metrics()

        if self.is_predict_phase or self.is_test_phase:
            self.env.main_program = clone_program(self.env.main_program,
                                                  for_test=True)
            hub.common.paddle_helper.set_op_attr(self.env.main_program,
                                                 is_test=True)

        if self.config.use_pyreader:
            t_program = fluid.Program()
            with fluid.program_guard(t_program, self.env.startup_program):
                self.env.py_reader = fluid.layers.py_reader(
                    capacity=64,
                    shapes=[var.shape for var in self.feed_var_list],
                    dtypes=[
                        dtype_map[var.dtype] for var in self.feed_var_list
                    ],
                    lod_levels=[var.lod_level for var in self.feed_var_list],
                    use_double_buffer=False)

                feed_var_list = self.feed_var_list
                py_vars = fluid.layers.read_file(self.env.py_reader)
                py_vars = to_list(py_vars)
                input_dict = {
                    feed_var_list[index].name: py_var
                    for index, py_var in enumerate(py_vars)
                }

                hub.connect_program(pre_program=t_program,
                                    next_program=self.env.main_program,
                                    input_dict=input_dict,
                                    need_log=False)

            self.env.main_program = t_program
            if not self.is_predict_phase:
                self.env.loss = self.env.main_program.global_block().vars[
                    self.env.loss.name]
                metrics_name = [var.name for var in self.env.metrics]
                self.env.metrics = [
                    self.env.main_program.global_block().vars[name]
                    for name in metrics_name
                ]

            outputs_name = [var.name for var in self.env.outputs]
            self.env.outputs = [
                self.env.main_program.global_block().vars[name]
                for name in outputs_name
            ]

        if self.config.enable_memory_optim:
            for var_name in self.fetch_list:
                var = self.env.main_program.global_block().vars[var_name]
                var.persistable = True

        if self.is_train_phase:
            with fluid.program_guard(self.env.main_program,
                                     self._base_startup_program):
                with fluid.unique_name.guard(self.env.UNG):
                    self.config.strategy.execute(self.loss,
                                                 self._base_data_reader,
                                                 self.config)

        if self.is_train_phase:
            loss_name = self.env.loss.name
            share_vars_from = None
        else:
            loss_name = None

        if self._base_compiled_program is None:
            share_vars_from = None
        else:
            share_vars_from = self._base_compiled_program

        if not self.config.use_data_parallel:
            if self.config.enable_memory_optim:
                fluid.memory_optimize(self.env.main_program)
            self.env.main_program_compiled = None
        else:
            self.env.main_program_compiled = fluid.CompiledProgram(
                self.env.main_program).with_data_parallel(
                    loss_name=loss_name,
                    share_vars_from=share_vars_from,
                    build_strategy=self.build_strategy)

            if self._base_compiled_program is None:
                self._base_compiled_program = self.env.main_program_compiled

        self.exe.run(self.env.startup_program)
        self._build_env_end_event()
예제 #4
0
    def _build_env(self):
        if self.env.is_inititalized:
            return

        self._build_env_start_event()
        self.env.is_inititalized = True
        self.env.main_program = clone_program(self.base_main_program,
                                              for_test=False)

        self.env.startup_program = fluid.Program()
        with fluid.program_guard(self.env.main_program,
                                 self._base_startup_program):
            with fluid.unique_name.guard(self.env.UNG):
                self.env.outputs = self._build_net()
                if self.is_train_phase or self.is_test_phase:
                    self.env.labels = self._add_label()
                    self.env.loss = self._add_loss()
                    self.env.metrics = self._add_metrics()

        if self.is_predict_phase or self.is_test_phase:
            # Todo: paddle.fluid.core_avx.EnforceNotMet: Getting 'tensor_desc' is not supported by the type of var kCUDNNFwdAlgoCache. at
            # self.env.main_program = clone_program(
            #     self.env.main_program, for_test=True)
            hub.common.paddle_helper.set_op_attr(self.env.main_program,
                                                 is_test=True)

        if self.config.use_pyreader:
            t_program = fluid.Program()
            with fluid.program_guard(t_program, self.env.startup_program):
                self.env.py_reader = fluid.layers.py_reader(
                    capacity=64,
                    shapes=[var.shape for var in self.feed_var_list],
                    dtypes=[
                        dtype_map[var.dtype] for var in self.feed_var_list
                    ],
                    lod_levels=[var.lod_level for var in self.feed_var_list],
                    use_double_buffer=False)

                feed_var_list = self.feed_var_list
                py_vars = fluid.layers.read_file(self.env.py_reader)
                py_vars = to_list(py_vars)
                input_dict = {
                    feed_var_list[index].name: py_var
                    for index, py_var in enumerate(py_vars)
                }

                hub.connect_program(pre_program=t_program,
                                    next_program=self.env.main_program,
                                    input_dict=input_dict,
                                    need_log=False)

            self.env.main_program = t_program
            if not self.is_predict_phase:
                self.env.loss = self.env.main_program.global_block().vars[
                    self.env.loss.name]
                metrics_name = [var.name for var in self.env.metrics]
                self.env.metrics = [
                    self.env.main_program.global_block().vars[name]
                    for name in metrics_name
                ]

            outputs_name = [var.name for var in self.env.outputs]
            self.env.outputs = [
                self.env.main_program.global_block().vars[name]
                for name in outputs_name
            ]

        if self.config.enable_memory_optim:
            for var_name in self.fetch_list:
                var = self.env.main_program.global_block().vars[var_name]
                var.persistable = True

        # to avoid to print logger two times in result of the logger usage of paddle-fluid 1.6
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        if self.is_train_phase:
            with fluid.program_guard(self.env.main_program,
                                     self._base_startup_program):
                with fluid.unique_name.guard(self.env.UNG):
                    self.scheduled_lr, self.max_train_steps = self.config.strategy.execute(
                        self.loss, self._base_data_reader, self.config,
                        self.device_count)

        if self.is_train_phase:
            loss_name = self.env.loss.name
        else:
            loss_name = None

        share_vars_from = self._base_compiled_program

        if not self.config.use_data_parallel:
            self.env.main_program_compiled = None
        else:
            self.env.main_program_compiled = fluid.CompiledProgram(
                self.env.main_program).with_data_parallel(
                    loss_name=loss_name,
                    share_vars_from=share_vars_from,
                    build_strategy=self.build_strategy)

        self.exe.run(self.env.startup_program)

        # to avoid to print logger two times in result of the logger usage of paddle-fluid 1.5
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        self._build_env_end_event()