def _generate_movielens_examples(cls,
                                  data_dir,
                                  generated_examples_dir,
                                  train_filename,
                                  test_filename,
                                  vocab_filename,
                                  meta_filename,
                                  min_timeline_length=3,
                                  max_context_length=10):
   """Generate movielens examples, and returns a dict contains meta."""
   train_file = os.path.join(generated_examples_dir, train_filename)
   test_file = os.path.join(generated_examples_dir, test_filename)
   meta_file = os.path.join(generated_examples_dir, meta_filename)
   # Create dataset and meta, only if they are not existed.
   if not all([os.path.exists(f) for f in (train_file, test_file, meta_file)]):
     stats = _gen.generate_datasets(
         data_dir,
         output_dir=generated_examples_dir,
         min_timeline_length=min_timeline_length,
         max_context_length=max_context_length,
         build_movie_vocab=True,
         train_filename=train_filename,
         test_filename=test_filename,
         vocab_filename=vocab_filename,
     )
     file_util.write_json_file(meta_file, stats)
   meta = file_util.load_json_file(meta_file)
   return meta
    def generate_movielens_dataset(
        cls,
        data_dir,
        generated_examples_dir=None,
        train_filename='train_movielens_1m.tfrecord',
        test_filename='test_movielens_1m.tfrecord',
        vocab_filename='movie_vocab.json',
        meta_filename='meta.json',
        min_timeline_length=3,
        max_context_length=10,
        max_context_movie_genre_length=10,
        min_rating=None,
        train_data_fraction=0.9,
        build_vocabs=True,
    ):
        """Generate movielens dataset, and returns a dict contains meta.

    Args:
      data_dir: str, path to dataset containing (unzipped) text data.
      generated_examples_dir: str, path to generate preprocessed examples.
        (default: same as data_dir)
      train_filename: str, generated file name for training data.
      test_filename: str, generated file name for test data.
      vocab_filename: str, generated file name for vocab data.
      meta_filename: str, generated file name for meta data.
      min_timeline_length: int, min timeline length to split train/eval set.
      max_context_length: int, max context length as one input.
      max_context_movie_genre_length: int, max context length of movie genre as
        one input.
      min_rating: int or None, include examples with min rating.
      train_data_fraction: float, percentage of training data [0.0, 1.0].
      build_vocabs: boolean, whether to build vocabs.

    Returns:
      Dict, metadata for the movielens dataset. Containing keys:
        `train_file`, `train_size`, `test_file`, `test_size`, vocab_file`,
        `vocab_size`, etc.
    """
        if not generated_examples_dir:
            # By default, set generated examples dir to data_dir
            generated_examples_dir = data_dir
        train_file = os.path.join(generated_examples_dir, train_filename)
        test_file = os.path.join(generated_examples_dir, test_filename)
        meta_file = os.path.join(generated_examples_dir, meta_filename)
        # Create dataset and meta, only if they are not existed.
        if not all(
            [os.path.exists(f) for f in (train_file, test_file, meta_file)]):
            stats = _gen.generate_datasets(
                data_dir,
                output_dir=generated_examples_dir,
                min_timeline_length=min_timeline_length,
                max_context_length=max_context_length,
                max_context_movie_genre_length=max_context_movie_genre_length,
                min_rating=min_rating,
                build_vocabs=build_vocabs,
                train_data_fraction=train_data_fraction,
                train_filename=train_filename,
                test_filename=test_filename,
                vocab_filename=vocab_filename,
            )
            file_util.write_json_file(meta_file, stats)
        meta = file_util.load_json_file(meta_file)
        return meta