Beispiel #1
0
flags.DEFINE_string(
    "t2t_usr_dir", "", "Path to a Python module that will be imported. The "
    "__init__.py file should include the necessary imports. "
    "The imported files should contain registrations, "
    "e.g. @registry.register_problem calls, that will then be "
    "available to t2t-datagen.")

# Mapping from problems that we can generate data for to their generators.
# pylint: disable=g-long-lambda
_SUPPORTED_PROBLEM_GENERATORS = {
    "algorithmic_algebra_inverse":
    (lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000),
     lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000),
     lambda: None),  # test set
    "parsing_english_ptb8k":
    (lambda: wsj_parsing.parsing_token_generator(FLAGS.data_dir, FLAGS.tmp_dir,
                                                 True, 2**13, 2**9),
     lambda: wsj_parsing.parsing_token_generator(FLAGS.data_dir, FLAGS.tmp_dir,
                                                 False, 2**13, 2**9),
     lambda: None),  # test set
    "parsing_english_ptb16k":
    (lambda: wsj_parsing.parsing_token_generator(FLAGS.data_dir, FLAGS.tmp_dir,
                                                 True, 2**14, 2**9),
     lambda: wsj_parsing.parsing_token_generator(FLAGS.data_dir, FLAGS.tmp_dir,
                                                 False, 2**14, 2**9),
     lambda: None),  # test set
    "inference_snli32k":
    (lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15),
     lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15),
     lambda: None),  # test set
    "audio_timit_characters_test":
    (lambda: audio.timit_generator(FLAGS.data_dir, FLAGS.tmp_dir, True, 1718),
Beispiel #2
0
    "Applies only to problems for which multiprocess_generate=True.")
flags.DEFINE_string("t2t_usr_dir", "",
                    "Path to a Python module that will be imported. The "
                    "__init__.py file should include the necessary imports. "
                    "The imported files should contain registrations, "
                    "e.g. @registry.register_problem calls, that will then be "
                    "available to t2t-datagen.")

# Mapping from problems that we can generate data for to their generators.
# pylint: disable=g-long-lambda
_SUPPORTED_PROBLEM_GENERATORS = {
    "algorithmic_algebra_inverse": (
        lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000),
        lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)),
    "parsing_english_ptb8k": (
        lambda: wsj_parsing.parsing_token_generator(
            FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13, 2**9),
        lambda: wsj_parsing.parsing_token_generator(
            FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13, 2**9)),
    "parsing_english_ptb16k": (
        lambda: wsj_parsing.parsing_token_generator(
            FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9),
        lambda: wsj_parsing.parsing_token_generator(
            FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)),
    "inference_snli32k": (
        lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15),
        lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15),
    ),
    "audio_timit_characters_test": (
        lambda: audio.timit_generator(
            FLAGS.data_dir, FLAGS.tmp_dir, True, 1718),
        lambda: audio.timit_generator(