Ejemplo n.º 1
0
    def run(self):
        """ Runs ensemble model. """
        vocab_source = Vocab(
            filename=self._model_configs["infer"]["source_words_vocabulary"],
            bpe_codes=self._model_configs["infer"]["source_bpecodes"])
        vocab_target = Vocab(
            filename=self._model_configs["infer"]["target_words_vocabulary"],
            bpe_codes=self._model_configs["infer"]["target_bpecodes"])

        estimator_spec = model_fn_ensemble(
            self._model_dirs,
            vocab_source,
            vocab_target,
            weight_scheme=self._weight_scheme,
            inference_options=self._model_configs["infer"])
        predict_op = estimator_spec.predictions
        sess = self._build_default_session()
        text_inputter = TextLineInputter(line_readers=[
            LineReader(
                data=p["features_file"],
                preprocessing_fn=lambda x: vocab_source.convert_to_idlist(x))
            for p in self._model_configs["infer_data"]
        ],
                                         padding_id=vocab_source.pad_id,
                                         batch_size=self.
                                         _model_configs["infer"]["batch_size"])
        sess.run(tf.global_variables_initializer())
        tf.logging.info("Start inference.")
        overall_start_time = time.time()

        for feeding_data, param in zip(
                text_inputter.make_feeding_data(estimator_spec.input_fields),
                self._model_configs["infer_data"]):
            tf.logging.info("Infer Source Features File: {}.".format(
                param["features_file"]))
            start_time = time.time()
            infer(sess=sess,
                  prediction_op=predict_op,
                  infer_data=feeding_data,
                  output=param["output_file"],
                  vocab_source=vocab_source,
                  vocab_target=vocab_target,
                  delimiter=self._model_configs["infer"]["delimiter"],
                  output_attention=False,
                  to_char_level=self._model_configs["infer"]["char_level"],
                  verbose=True)
            tf.logging.info("FINISHED {}. Elapsed Time: {}.".format(
                param["features_file"], str(time.time() - start_time)))
            if param["labels_file"] is not None:
                bleu_score = multi_bleu_score_from_file(
                    hypothesis_file=param["output_file"],
                    references_files=param["labels_file"],
                    char_level=self._model_configs["infer"]["char_level"])
                tf.logging.info("BLEU score (%s): %.2f" %
                                (param["features_file"], bleu_score))
        tf.logging.info("Total Elapsed Time: %s" %
                        str(time.time() - overall_start_time))
Ejemplo n.º 2
0
    def run(self):
        """ Runs ensemble model. """
        self._vocab_source = Vocab(
            filename=self._model_configs["infer"]["source_words_vocabulary"],
            bpe_codes_file=self._model_configs["infer"]["source_bpecodes"])
        self._vocab_target = Vocab(
            filename=self._model_configs["infer"]["target_words_vocabulary"],
            bpe_codes_file=self._model_configs["infer"]["target_bpecodes"])
        # build dataset
        dataset = Dataset(self._vocab_source,
                          self._vocab_target,
                          eval_features_file=[
                              p["features_file"]
                              for p in self._model_configs["infer_data"]
                          ])
        estimator_spec = model_fn_ensemble(
            self._model_dirs,
            dataset,
            weight_scheme=self._weight_scheme,
            inference_options=self._model_configs["infer"])
        predict_op = estimator_spec.predictions
        sess = self._build_default_session()
        text_inputter = TextLineInputter(
            dataset=dataset,
            data_field_name="eval_features_file",
            batch_size=self._model_configs["infer"]["batch_size"])
        sess.run(tf.global_variables_initializer())
        tf.logging.info("Start inference.")
        overall_start_time = time.time()

        for feeding_data, param in zip(text_inputter.make_feeding_data(),
                                       self._model_configs["infer_data"]):
            tf.logging.info("Infer Source Features File: {}.".format(
                param["features_file"]))
            start_time = time.time()
            infer(sess=sess,
                  prediction_op=predict_op,
                  feeding_data=feeding_data,
                  output=param["output_file"],
                  vocab_target=self._vocab_target,
                  delimiter=self._model_configs["infer"]["delimiter"],
                  output_attention=False,
                  tokenize_output=self._model_configs["infer"]["char_level"],
                  tokenize_script=self._model_configs["infer"]
                  ["tokenize_script"],
                  verbose=True)
            tf.logging.info("FINISHED {}. Elapsed Time: {}.".format(
                param["features_file"], str(time.time() - start_time)))
            if param["labels_file"] is not None:
                bleu_score = multi_bleu_score(
                    self._model_configs["infer"]["multibleu_script"],
                    param["labels_file"], param["output_file"])
                tf.logging.info("BLEU score ({}): {}".format(
                    param["features_file"], bleu_score))
        tf.logging.info("Total Elapsed Time: %s" %
                        str(time.time() - overall_start_time))
Ejemplo n.º 3
0
    def run(self):
        """ Runs ensemble model. """
        self._vocab_source = Vocab(
            filename=self._model_configs["infer"]["source_words_vocabulary"],
            bpe_codes=self._model_configs["infer"]["source_bpecodes"])
        self._vocab_target = Vocab(
            filename=self._model_configs["infer"]["target_words_vocabulary"],
            bpe_codes=self._model_configs["infer"]["target_bpecodes"])
        # build dataset
        dataset = Dataset(
            self._vocab_source,
            self._vocab_target,
            eval_features_file=[p["features_file"] for p
                                in self._model_configs["infer_data"]])
        estimator_spec = model_fn_ensemble(
            self._model_dirs, dataset, weight_scheme=self._weight_scheme,
            inference_options=self._model_configs["infer"])
        predict_op = estimator_spec.predictions
        sess = self._build_default_session()
        text_inputter = TextLineInputter(
            dataset=dataset,
            data_field_name="eval_features_file",
            batch_size=self._model_configs["infer"]["batch_size"])
        sess.run(tf.global_variables_initializer())
        tf.logging.info("Start inference.")
        overall_start_time = time.time()

        for feeding_data, param in zip(text_inputter.make_feeding_data(estimator_spec.input_fields),
                                       self._model_configs["infer_data"]):
            tf.logging.info("Infer Source Features File: {}.".format(param["features_file"]))
            start_time = time.time()
            infer(sess=sess,
                  prediction_op=predict_op,
                  infer_data=feeding_data,
                  output=param["output_file"],
                  vocab_source=self._vocab_source,
                  vocab_target=self._vocab_target,
                  delimiter=self._model_configs["infer"]["delimiter"],
                  output_attention=False,
                  tokenize_output=self._model_configs["infer"]["char_level"],
                  verbose=True)
            tf.logging.info("FINISHED {}. Elapsed Time: {}."
                            .format(param["features_file"], str(time.time() - start_time)))
            if param["labels_file"] is not None:
                bleu_score = multi_bleu_score_from_file(
                    hypothesis_file=param["output_file"],
                    references_files=param["labels_file"],
                    char_level=self._model_configs["infer"]["char_level"])
                tf.logging.info("BLEU score (%s): %.2f"
                                % (param["features_file"], bleu_score))
        tf.logging.info("Total Elapsed Time: %s" % str(time.time() - overall_start_time))