def _prepare(self): """ Prepares for evaluation. Builds the model with reuse=True, mode=EVAL and preprocesses data file(s). """ features_file = self._dataset["features_file"] labels_file = self._dataset["labels_file"] vocab_source = self._dataset["vocab_source"] vocab_target = self._dataset["vocab_target"] self._model_configs = update_infer_params( # update inference parameters self._model_configs, beam_size=self._beam_size, maximum_labels_length=self._maximum_labels_length, length_penalty=self._length_penalty) estimator_spec = model_fn(model_configs=self._model_configs, mode=ModeKeys.INFER, vocab_source=vocab_source, vocab_target=vocab_target, name=self._model_name, reuse=True, verbose=False) self._predict_ops = estimator_spec.predictions text_inputter = TextLineInputter( line_readers=LineReader( data=features_file, preprocessing_fn=lambda x: vocab_source.convert_to_idlist(x)), padding_id=vocab_source.pad_id, batch_size=self._batch_size) self._infer_data = text_inputter.make_feeding_data( input_fields=estimator_spec.input_fields) tmp_trans_dir = os.path.join(self._model_configs["model_dir"], Constants.TMP_TRANS_DIRNAME) if not gfile.Exists(tmp_trans_dir): gfile.MakeDirs(tmp_trans_dir) self._tmp_trans_file_prefix = os.path.join(tmp_trans_dir, Constants.TMP_TRANS_FILENAME_PREFIX) self._read_ckpt_bleulog() # load references self._references = [] for rfile in access_multiple_files(labels_file): with open_file(rfile) as fp: if self._char_level: self._references.append(to_chinese_char(fp.readlines())) else: self._references.append(fp.readlines()) self._references = list(map(list, zip(*self._references))) with open_file(features_file) as fp: self._sources = fp.readlines() self._bad_count = 0 self._best_bleu_score = 0.
def _prepare(self): """ Prepares for evaluation. Builds the model with reuse=True, mode=EVAL and preprocesses data file(s). """ self._model_configs = update_infer_params( # update inference parameters self._model_configs, beam_size=self._beam_size, maximum_labels_length=self._maximum_labels_length, length_penalty=self._length_penalty) estimator_spec = model_fn(model_configs=self._model_configs, mode=ModeKeys.INFER, dataset=self._dataset, name=self._model_name, reuse=True, verbose=False) self._predict_ops = estimator_spec.predictions text_inputter = TextLineInputter( dataset=self._dataset, data_field_name="eval_features_file", batch_size=self._batch_size) self._infer_data = text_inputter.make_feeding_data( input_fields=estimator_spec.input_fields) tmp_trans_dir = os.path.join(self._model_configs["model_dir"], Constants.TMP_TRANS_DIRNAME) if not gfile.Exists(tmp_trans_dir): gfile.MakeDirs(tmp_trans_dir) self._tmp_trans_file_prefix = os.path.join(tmp_trans_dir, Constants.TMP_TRANS_FILENAME_PREFIX) self._read_ckpt_bleulog() # load references self._references = [] for rfile in self._dataset.eval_labels_file: with open_file(rfile) as fp: if self._char_level: self._references.append(to_chinese_char(fp.readlines())) else: self._references.append(fp.readlines()) self._references = list(map(list, zip(*self._references))) with open_file(self._dataset.eval_features_file) as fp: self._sources = fp.readlines() self._bad_count = 0 self._best_bleu_score = 0.
def multi_bleu_score_from_file(hypothesis_file, references_files, char_level=False): """ Computes corpus-level BLEU from hypothesis file and reference file(s). Args: hypothesis_file: A string. references_files: A string. The name of reference file or the prefix. char_level: Whether evaluate at char-level (for Chinese only). Returns: A float. """ with open_file(hypothesis_file) as fp: hypothesis = fp.readlines() references = [] for ref_file in access_multiple_files(references_files): with open_file(ref_file) as fp: if char_level: references.append((to_chinese_char(fp.readlines()))) else: references.append(fp.readlines()) references = list(map(list, zip(*references))) return multi_bleu_score(hypothesis, references)
def multi_bleu_score_from_file( hypothesis_file, references_files, char_level=False): """ Computes corpus-level BLEU from hypothesis file and reference file(s). Args: hypothesis_file: A string. references_files: A string. The name of reference file or the prefix. char_level: Whether evaluate at char-level (for Chinese only). Returns: A float. """ with open_file(hypothesis_file) as fp: hypothesis = fp.readlines() references = [] for ref_file in get_labels_files(references_files): with open_file(ref_file) as fp: if char_level: references.append((to_chinese_char(fp.readlines()))) else: references.append(fp.readlines()) references = list(map(list, zip(*references))) return multi_bleu_score(hypothesis, references)
def infer(sess, prediction_op, infer_data, output, vocab_source, vocab_target, delimiter=" ", output_attention=False, tokenize_output=False, verbose=True): """ Infers data and save the prediction results. Args: sess: `tf.Session`. prediction_op: Tensorflow operation for inference. infer_data: An iterable instance that each element is a packed feeding dictionary for `sess`. output: Output file name, `str`. vocab_source: A `Vocab` instance for source side feature map. vocab_target: A `Vocab` instance for target side feature map. alpha: A scalar number, length penalty rate. If not provided or < 0, simply average each beam by length of predicted sequence. delimiter: The delimiter of output token sequence. output_attention: Whether to output attention information. tokenize_output: Whether to split words into characters (only for Chinese). verbose: Print inference information if set True. Returns: A tuple `(sources, hypothesis)`, two lists of strings. """ attentions = dict() hypothesis = [] sources = [] cnt = 0 for data in infer_data: source_tokens = [ vocab_source.convert_to_wordlist(x, bpe_decoding=False) for x in data["feature_ids"] ] x_str = [delimiter.join(x) for x in source_tokens] prediction, att = _infer(sess, data["feed_dict"], prediction_op, len(x_str), top_k=1, output_attention=output_attention) sources.extend(x_str) hypothesis.extend([ delimiter.join( vocab_target.convert_to_wordlist(prediction[sample_idx])) for sample_idx in range(prediction.shape[0]) ]) if output_attention and att is not None: candidate_tokens = [ vocab_target.convert_to_wordlist(prediction[idx, :], bpe_decoding=False, reverse_seq=False) for idx in range(len(x_str)) ] attentions.update( pack_batch_attention_dict(cnt, source_tokens, candidate_tokens, att)) cnt += len(x_str) if verbose: tf.logging.info(cnt) if tokenize_output: hypothesis = to_chinese_char(hypothesis) if output: with gfile.GFile(output, "w") as fw: fw.write("\n".join(hypothesis) + "\n") if output_attention: dump_attentions(output, attentions) return sources, hypothesis
def infer( sess, prediction_op, infer_data, output, vocab_source, vocab_target, delimiter=" ", output_attention=False, tokenize_output=False, verbose=True): """ Infers data and save the prediction results. Args: sess: `tf.Session`. prediction_op: Tensorflow operation for inference. infer_data: An iterable instance that each element is a packed feeding dictionary for `sess`. output: Output file name, `str`. vocab_source: A `Vocab` instance for source side feature map. vocab_target: A `Vocab` instance for target side feature map. alpha: A scalar number, length penalty rate. If not provided or < 0, simply average each beam by length of predicted sequence. delimiter: The delimiter of output token sequence. output_attention: Whether to output attention information. tokenize_output: Whether to split words into characters (only for Chinese). verbose: Print inference information if set True. Returns: A tuple `(sources, hypothesis)`, two lists of strings. """ attentions = dict() hypothesis = [] sources = [] cnt = 0 for data in infer_data: source_tokens = [vocab_source.convert_to_wordlist(x, bpe_decoding=False) for x in data["feature_ids"]] x_str = [delimiter.join(x) for x in source_tokens] prediction, att = _infer(sess, data["feed_dict"], prediction_op, len(x_str), top_k=1, output_attention=output_attention) sources.extend(x_str) hypothesis.extend([delimiter.join(vocab_target.convert_to_wordlist(prediction[sample_idx])) for sample_idx in range(len(prediction))]) if output_attention and att is not None: candidate_tokens = [vocab_target.convert_to_wordlist( prediction[idx], bpe_decoding=False, reverse_seq=False) for idx in range(len(x_str))] attentions.update(pack_batch_attention_dict( cnt, source_tokens, candidate_tokens, att)) cnt += len(x_str) if verbose: tf.logging.info(cnt) if tokenize_output: hypothesis = to_chinese_char(hypothesis) if output: with gfile.GFile(output, "w") as fw: fw.write("\n".join(hypothesis) + "\n") if output_attention: dump_attentions(output, attentions) return sources, hypothesis