flags.DEFINE_string( "t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " "The imported files should contain registrations, " "e.g. @registry.register_problem calls, that will then be " "available to t2t-datagen.") # Mapping from problems that we can generate data for to their generators. # pylint: disable=g-long-lambda _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": (lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000), lambda: None), # test set "parsing_english_ptb8k": (lambda: wsj_parsing.parsing_token_generator(FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13, 2**9), lambda: wsj_parsing.parsing_token_generator(FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13, 2**9), lambda: None), # test set "parsing_english_ptb16k": (lambda: wsj_parsing.parsing_token_generator(FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), lambda: wsj_parsing.parsing_token_generator(FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9), lambda: None), # test set "inference_snli32k": (lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), lambda: None), # test set "audio_timit_characters_test": (lambda: audio.timit_generator(FLAGS.data_dir, FLAGS.tmp_dir, True, 1718),
"Applies only to problems for which multiprocess_generate=True.") flags.DEFINE_string("t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " "The imported files should contain registrations, " "e.g. @registry.register_problem calls, that will then be " "available to t2t-datagen.") # Mapping from problems that we can generate data for to their generators. # pylint: disable=g-long-lambda _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), "parsing_english_ptb8k": ( lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13, 2**9), lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13, 2**9)), "parsing_english_ptb16k": ( lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)), "inference_snli32k": ( lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), ), "audio_timit_characters_test": ( lambda: audio.timit_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 1718), lambda: audio.timit_generator(