Ejemplo n.º 1
0
def main(_):
    usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

    # Calculate the list of problems to generate.
    problems = sorted(
        list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())
    for exclude in FLAGS.exclude_problems.split(","):
        if exclude:
            problems = [p for p in problems if exclude not in p]
    if FLAGS.problem and FLAGS.problem[-1] == "*":
        problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])]
    elif FLAGS.problem and "," in FLAGS.problem:
        problems = [p for p in problems if p in FLAGS.problem.split(",")]
    elif FLAGS.problem:
        problems = [p for p in problems if p == FLAGS.problem]
    else:
        problems = []

    # Remove TIMIT if paths are not given.
    if getattr(FLAGS, "timit_paths", None):
        problems = [p for p in problems if "timit" not in p]
    # Remove parsing if paths are not given.
    if getattr(FLAGS, "parsing_path", None):
        problems = [p for p in problems if "parsing_english_ptb" not in p]

    if not problems:
        problems_str = "\n  * ".join(
            sorted(
                list(_SUPPORTED_PROBLEM_GENERATORS) +
                registry.list_problems()))
        error_msg = ("You must specify one of the supported problems to "
                     "generate data for:\n  * " + problems_str + "\n")
        error_msg += ("TIMIT and parsing need data_sets specified with "
                      "--timit_paths and --parsing_path.")
        raise ValueError(error_msg)

    if not FLAGS.data_dir:
        FLAGS.data_dir = tempfile.gettempdir()
        tf.logging.warning(
            "It is strongly recommended to specify --data_dir. "
            "Data will be written to default data_dir=%s.", FLAGS.data_dir)
    FLAGS.data_dir = os.path.expanduser(FLAGS.data_dir)
    tf.gfile.MakeDirs(FLAGS.data_dir)

    tf.logging.info(
        "Generating problems:\n%s" %
        registry.display_list_by_prefix(problems, starting_spaces=4))
    if FLAGS.only_list:
        return
    for problem in problems:
        set_random_seed()

        if problem in _SUPPORTED_PROBLEM_GENERATORS:
            generate_data_for_problem(problem)
        else:
            generate_data_for_registered_problem(problem)
Ejemplo n.º 2
0
def main(_):
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

  # Calculate the list of problems to generate.
  problems = sorted(
      list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())
  for exclude in FLAGS.exclude_problems.split(","):
    if exclude:
      problems = [p for p in problems if exclude not in p]
  if FLAGS.problem and FLAGS.problem[-1] == "*":
    problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])]
  elif FLAGS.problem and "," in FLAGS.problem:
    problems = [p for p in problems if p in FLAGS.problem.split(",")]
  elif FLAGS.problem:
    problems = [p for p in problems if p == FLAGS.problem]
  else:
    problems = []

  # Remove TIMIT if paths are not given.
  if getattr(FLAGS, "timit_paths", None):
    problems = [p for p in problems if "timit" not in p]
  # Remove parsing if paths are not given.
  if getattr(FLAGS, "parsing_path", None):
    problems = [p for p in problems if "parsing_english_ptb" not in p]

  if not problems:
    problems_str = "\n  * ".join(
        sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()))
    error_msg = ("You must specify one of the supported problems to "
                 "generate data for:\n  * " + problems_str + "\n")
    error_msg += ("TIMIT and parsing need data_sets specified with "
                  "--timit_paths and --parsing_path.")
    raise ValueError(error_msg)

  if not FLAGS.data_dir:
    FLAGS.data_dir = tempfile.gettempdir()
    tf.logging.warning("It is strongly recommended to specify --data_dir. "
                       "Data will be written to default data_dir=%s.",
                       FLAGS.data_dir)
  FLAGS.data_dir = os.path.expanduser(FLAGS.data_dir)
  tf.gfile.MakeDirs(FLAGS.data_dir)

  tf.logging.info("Generating problems:\n%s"
                  % registry.display_list_by_prefix(problems,
                                                    starting_spaces=4))
  if FLAGS.only_list:
    return
  for problem in problems:
    set_random_seed()

    if problem in _SUPPORTED_PROBLEM_GENERATORS:
      generate_data_for_problem(problem)
    else:
      generate_data_for_registered_problem(problem)
Ejemplo n.º 3
0
def main(_):
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # 不设置t2t_usr_dir参数,则此步不做事情

  # Calculate the list of problems to generate.
  problems = sorted( # 这是将上面列举的任务和注册的任务统一在一起?
      list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())
  for exclude in FLAGS.exclude_problems.split(","): # 可以通过参数指定排除一些任务
    if exclude:
      problems = [p for p in problems if exclude not in p]
  if FLAGS.problem and FLAGS.problem[-1] == "*": # 这意思是问题后面带个*号,即表示选择了包含该前缀的所有问题
    problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])]
  elif FLAGS.problem: # 如果不带*号,则仅仅将这一个问题选出来
    problems = [p for p in problems if p == FLAGS.problem]
  else: # 否则就是没有问题
    problems = []

  # Remove TIMIT if paths are not given. # 有两个特殊的任务需要特殊的参数,如果这些参数没有指定,则没办进行下去
  if not FLAGS.timit_paths: 
    problems = [p for p in problems if "timit" not in p]
  # Remove parsing if paths are not given.
  if not FLAGS.parsing_path:
    problems = [p for p in problems if "parsing_english_ptb" not in p]

  if not problems: # 没有问题则报错
    problems_str = "\n  * ".join(
        sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()))
    error_msg = ("You must specify one of the supported problems to "
                 "generate data for:\n  * " + problems_str + "\n")
    error_msg += ("TIMIT and parsing need data_sets specified with "
                  "--timit_paths and --parsing_path.")
    raise ValueError(error_msg)

  if not FLAGS.data_dir: # 如果没有指定数据路径,则警告,并告知默认的数据路径
    FLAGS.data_dir = tempfile.gettempdir()
    tf.logging.warning("It is strongly recommended to specify --data_dir. "
                       "Data will be written to default data_dir=%s.",
                       FLAGS.data_dir)
  FLAGS.data_dir = os.path.expanduser(FLAGS.data_dir) # 扩展数据路径
  tf.gfile.MakeDirs(FLAGS.data_dir) # 创建路径

  tf.logging.info("Generating problems:\n%s"
                  % registry.display_list_by_prefix(problems,
                                                    starting_spaces=4))
  if FLAGS.only_list: # 真是高级呢==,控制“仅仅展示一下所有问题”,还是“不仅展示,还要生成这些数据”
    return
  for problem in problems:
    set_random_seed() # 先设置随机种子

    if problem in _SUPPORTED_PROBLEM_GENERATORS: # 两种不同来源的任务还要分不同的生成方式
      generate_data_for_problem(problem)
    else:
      generate_data_for_registered_problem(problem)