def _generate_movielens_examples(cls, data_dir, generated_examples_dir, train_filename, test_filename, vocab_filename, meta_filename, min_timeline_length=3, max_context_length=10): """Generate movielens examples, and returns a dict contains meta.""" train_file = os.path.join(generated_examples_dir, train_filename) test_file = os.path.join(generated_examples_dir, test_filename) meta_file = os.path.join(generated_examples_dir, meta_filename) # Create dataset and meta, only if they are not existed. if not all([os.path.exists(f) for f in (train_file, test_file, meta_file)]): stats = _gen.generate_datasets( data_dir, output_dir=generated_examples_dir, min_timeline_length=min_timeline_length, max_context_length=max_context_length, build_movie_vocab=True, train_filename=train_filename, test_filename=test_filename, vocab_filename=vocab_filename, ) file_util.write_json_file(meta_file, stats) meta = file_util.load_json_file(meta_file) return meta
def evaluate(self, model, input_fn, num_steps, eval_examples, eval_features, predict_file, version_2_with_negative, max_answer_length, null_score_diff_threshold, verbose_logging, output_dir): """Evaluate QA model. Args: model: The model to be evaluated. input_fn: Function that returns a tf.data.Dataset used for evaluation. num_steps: Number of steps to evaluate the model. eval_examples: List of `squad_lib.SquadExample` for evaluation data. eval_features: List of `squad_lib.InputFeatures` for evaluation data. predict_file: The input predict file. version_2_with_negative: Whether the input predict file is SQuAD 2.0 format. max_answer_length: The maximum length of an answer that can be generated. This is needed because the start and end predictions are not conditioned on one another. null_score_diff_threshold: If null_score - best_non_null is greater than the threshold, predict null. This is only used for SQuAD v2. verbose_logging: If true, all of the warnings related to data processing will be printed. A number of warnings are expected for a normal SQuAD evaluation. output_dir: The output directory to save output to json files: predictions.json, nbest_predictions.json, null_odds.json. If None, skip saving to json files. Returns: A dict contains two metrics: Exact match rate and F1 score. """ all_results = self.predict(model, input_fn, num_steps) all_predictions, all_nbest_json, scores_diff_json = ( squad_lib.postprocess_output( eval_examples, eval_features, all_results, n_best_size=20, max_answer_length=max_answer_length, do_lower_case=self.do_lower_case, version_2_with_negative=version_2_with_negative, null_score_diff_threshold=null_score_diff_threshold, verbose=verbose_logging)) if output_dir is not None: dump_to_files(all_predictions, all_nbest_json, scores_diff_json, version_2_with_negative, output_dir) dataset_json = file_util.load_json_file(predict_file) pred_dataset = dataset_json['data'] if version_2_with_negative: eval_metrics = squad_evaluate_v2_0.evaluate( pred_dataset, all_predictions, scores_diff_json) else: eval_metrics = squad_evaluate_v1_1.evaluate( pred_dataset, all_predictions) return eval_metrics
def _load(tfrecord_file, meta_data_file, model_spec, is_training=None): """Loads data from tfrecord file and metada file.""" if is_training is None: name_to_features = model_spec.get_name_to_features() else: name_to_features = model_spec.get_name_to_features(is_training=is_training) dataset = input_pipeline.single_file_dataset(tfrecord_file, name_to_features) dataset = dataset.map( model_spec.select_data_from_record, num_parallel_calls=tf.data.AUTOTUNE) meta_data = file_util.load_json_file(meta_data_file) logging.info( 'Load preprocessed data and metadata from %s and %s ' 'with size: %d', tfrecord_file, meta_data_file, meta_data['size']) return dataset, meta_data
def generate_movielens_dataset( cls, data_dir, generated_examples_dir=None, train_filename='train_movielens_1m.tfrecord', test_filename='test_movielens_1m.tfrecord', vocab_filename='movie_vocab.json', meta_filename='meta.json', min_timeline_length=3, max_context_length=10, max_context_movie_genre_length=10, min_rating=None, train_data_fraction=0.9, build_vocabs=True, ): """Generate movielens dataset, and returns a dict contains meta. Args: data_dir: str, path to dataset containing (unzipped) text data. generated_examples_dir: str, path to generate preprocessed examples. (default: same as data_dir) train_filename: str, generated file name for training data. test_filename: str, generated file name for test data. vocab_filename: str, generated file name for vocab data. meta_filename: str, generated file name for meta data. min_timeline_length: int, min timeline length to split train/eval set. max_context_length: int, max context length as one input. max_context_movie_genre_length: int, max context length of movie genre as one input. min_rating: int or None, include examples with min rating. train_data_fraction: float, percentage of training data [0.0, 1.0]. build_vocabs: boolean, whether to build vocabs. Returns: Dict, metadata for the movielens dataset. Containing keys: `train_file`, `train_size`, `test_file`, `test_size`, vocab_file`, `vocab_size`, etc. """ if not generated_examples_dir: # By default, set generated examples dir to data_dir generated_examples_dir = data_dir train_file = os.path.join(generated_examples_dir, train_filename) test_file = os.path.join(generated_examples_dir, test_filename) meta_file = os.path.join(generated_examples_dir, meta_filename) # Create dataset and meta, only if they are not existed. if not all( [os.path.exists(f) for f in (train_file, test_file, meta_file)]): stats = _gen.generate_datasets( data_dir, output_dir=generated_examples_dir, min_timeline_length=min_timeline_length, max_context_length=max_context_length, max_context_movie_genre_length=max_context_movie_genre_length, min_rating=min_rating, build_vocabs=build_vocabs, train_data_fraction=train_data_fraction, train_filename=train_filename, test_filename=test_filename, vocab_filename=vocab_filename, ) file_util.write_json_file(meta_file, stats) meta = file_util.load_json_file(meta_file) return meta