def run(self): """ Runs ensemble model. """ vocab_source = Vocab( filename=self._model_configs["infer"]["source_words_vocabulary"], bpe_codes=self._model_configs["infer"]["source_bpecodes"]) vocab_target = Vocab( filename=self._model_configs["infer"]["target_words_vocabulary"], bpe_codes=self._model_configs["infer"]["target_bpecodes"]) estimator_spec = model_fn_ensemble( self._model_dirs, vocab_source, vocab_target, weight_scheme=self._weight_scheme, inference_options=self._model_configs["infer"]) predict_op = estimator_spec.predictions sess = self._build_default_session() text_inputter = TextLineInputter(line_readers=[ LineReader( data=p["features_file"], preprocessing_fn=lambda x: vocab_source.convert_to_idlist(x)) for p in self._model_configs["infer_data"] ], padding_id=vocab_source.pad_id, batch_size=self. _model_configs["infer"]["batch_size"]) sess.run(tf.global_variables_initializer()) tf.logging.info("Start inference.") overall_start_time = time.time() for feeding_data, param in zip( text_inputter.make_feeding_data(estimator_spec.input_fields), self._model_configs["infer_data"]): tf.logging.info("Infer Source Features File: {}.".format( param["features_file"])) start_time = time.time() infer(sess=sess, prediction_op=predict_op, infer_data=feeding_data, output=param["output_file"], vocab_source=vocab_source, vocab_target=vocab_target, delimiter=self._model_configs["infer"]["delimiter"], output_attention=False, to_char_level=self._model_configs["infer"]["char_level"], verbose=True) tf.logging.info("FINISHED {}. Elapsed Time: {}.".format( param["features_file"], str(time.time() - start_time))) if param["labels_file"] is not None: bleu_score = multi_bleu_score_from_file( hypothesis_file=param["output_file"], references_files=param["labels_file"], char_level=self._model_configs["infer"]["char_level"]) tf.logging.info("BLEU score (%s): %.2f" % (param["features_file"], bleu_score)) tf.logging.info("Total Elapsed Time: %s" % str(time.time() - overall_start_time))
def run(self): """ Runs ensemble model. """ self._vocab_source = Vocab( filename=self._model_configs["infer"]["source_words_vocabulary"], bpe_codes_file=self._model_configs["infer"]["source_bpecodes"]) self._vocab_target = Vocab( filename=self._model_configs["infer"]["target_words_vocabulary"], bpe_codes_file=self._model_configs["infer"]["target_bpecodes"]) # build dataset dataset = Dataset(self._vocab_source, self._vocab_target, eval_features_file=[ p["features_file"] for p in self._model_configs["infer_data"] ]) estimator_spec = model_fn_ensemble( self._model_dirs, dataset, weight_scheme=self._weight_scheme, inference_options=self._model_configs["infer"]) predict_op = estimator_spec.predictions sess = self._build_default_session() text_inputter = TextLineInputter( dataset=dataset, data_field_name="eval_features_file", batch_size=self._model_configs["infer"]["batch_size"]) sess.run(tf.global_variables_initializer()) tf.logging.info("Start inference.") overall_start_time = time.time() for feeding_data, param in zip(text_inputter.make_feeding_data(), self._model_configs["infer_data"]): tf.logging.info("Infer Source Features File: {}.".format( param["features_file"])) start_time = time.time() infer(sess=sess, prediction_op=predict_op, feeding_data=feeding_data, output=param["output_file"], vocab_target=self._vocab_target, delimiter=self._model_configs["infer"]["delimiter"], output_attention=False, tokenize_output=self._model_configs["infer"]["char_level"], tokenize_script=self._model_configs["infer"] ["tokenize_script"], verbose=True) tf.logging.info("FINISHED {}. Elapsed Time: {}.".format( param["features_file"], str(time.time() - start_time))) if param["labels_file"] is not None: bleu_score = multi_bleu_score( self._model_configs["infer"]["multibleu_script"], param["labels_file"], param["output_file"]) tf.logging.info("BLEU score ({}): {}".format( param["features_file"], bleu_score)) tf.logging.info("Total Elapsed Time: %s" % str(time.time() - overall_start_time))
def run(self): """ Runs ensemble model. """ self._vocab_source = Vocab( filename=self._model_configs["infer"]["source_words_vocabulary"], bpe_codes=self._model_configs["infer"]["source_bpecodes"]) self._vocab_target = Vocab( filename=self._model_configs["infer"]["target_words_vocabulary"], bpe_codes=self._model_configs["infer"]["target_bpecodes"]) # build dataset dataset = Dataset( self._vocab_source, self._vocab_target, eval_features_file=[p["features_file"] for p in self._model_configs["infer_data"]]) estimator_spec = model_fn_ensemble( self._model_dirs, dataset, weight_scheme=self._weight_scheme, inference_options=self._model_configs["infer"]) predict_op = estimator_spec.predictions sess = self._build_default_session() text_inputter = TextLineInputter( dataset=dataset, data_field_name="eval_features_file", batch_size=self._model_configs["infer"]["batch_size"]) sess.run(tf.global_variables_initializer()) tf.logging.info("Start inference.") overall_start_time = time.time() for feeding_data, param in zip(text_inputter.make_feeding_data(estimator_spec.input_fields), self._model_configs["infer_data"]): tf.logging.info("Infer Source Features File: {}.".format(param["features_file"])) start_time = time.time() infer(sess=sess, prediction_op=predict_op, infer_data=feeding_data, output=param["output_file"], vocab_source=self._vocab_source, vocab_target=self._vocab_target, delimiter=self._model_configs["infer"]["delimiter"], output_attention=False, tokenize_output=self._model_configs["infer"]["char_level"], verbose=True) tf.logging.info("FINISHED {}. Elapsed Time: {}." .format(param["features_file"], str(time.time() - start_time))) if param["labels_file"] is not None: bleu_score = multi_bleu_score_from_file( hypothesis_file=param["output_file"], references_files=param["labels_file"], char_level=self._model_configs["infer"]["char_level"]) tf.logging.info("BLEU score (%s): %.2f" % (param["features_file"], bleu_score)) tf.logging.info("Total Elapsed Time: %s" % str(time.time() - overall_start_time))