def _main(_): arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False) args, remaining_argv = flags_core.intelligent_parse_flags( FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) metric = build_metric(args) evaluate(metric, args["hypo_file"], args["ref_file"])
def _main(_): arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False) args, remaining_argv = flags_core.intelligent_parse_flags( FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) average_checkpoints(checkpoints=flatten_string_list(args["checkpoints"]), output_path=args["output_path"])
def _main(_): # define and parse program flags arg_parser = flags_core.define_flags(FLAG_LIST) args, remaining_argv = flags_core.intelligent_parse_flags( FLAG_LIST, arg_parser, _pre_load_args) args, remaining_argv = flags_core.extend_define_and_parse( BaseExperiment.REGISTRY_NAME, args, remaining_argv) if args["entry.class"] is None: raise ValueError("Must provide entry/entry.class.") run_experiment(args, remaining_argv)
def _main(_): arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False) args, remaining_argv = flags_core.intelligent_parse_flags( FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) generate_vocab(input=args["input"], output=args["output"], min_frequency=args["min_frequency"], max_vocab_size=args["max_vocab_size"], lowercase=args["lowercase"], extra_slots=args["extra_slots"])
def _main(_): # define and parse program flags arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=True) args, remaining_argv = flags_core.intelligent_parse_flags( FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) dataset = build_dataset(args) feature_extractor = build_feature_extractor(args) if dataset is None: raise ValueError("dataset must be provided.") main(dataset, feature_extractor)
def _main(_): arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False) args, remaining_argv = flags_core.intelligent_parse_flags(FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) tokenizer = build_tokenizer(args) with tf.io.gfile.GFile(args["input"]) as fp: with tf.io.gfile.GFile(args["output"], "w") as fw: for line in fp: line = lowercase_and_remove_punctuations(tokenizer.language, line.strip(), args["lowercase"], args["remove_punctuation"]) fw.write(tokenizer.tokenize(line, return_str=True) + "\n")
def _main(_): # define and parse program flags arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=True) args, remaining_argv = flags_core.intelligent_parse_flags( FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) dataset = build_dataset(args) if dataset is None: raise ValueError("dataset must be provided.") main(dataset=dataset, output_transcript_file=args["output_transcript_file"], output_translation_file=args["output_translation_file"])
def _main(_): # define and parse program flags arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=True) args, remaining_argv = flags_core.intelligent_parse_flags( FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) task = build_task(args) dataset = build_dataset(args) if dataset is None: raise ValueError("dataset must be provided.") main(processor_id=args["processor_id"], num_processors=args["num_processors"], num_output_shards=args["num_output_shards"], output_range_begin=args["output_range_begin"], output_range_end=args["output_range_end"], output_template=args["output_template"], progressbar=args["progressbar"], dataset=dataset, task=task)
def _main(_): arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False) args, remaining_argv = flags_core.parse_flags(FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) converter = build_converter(args["model_name"]) convert(converter, args["from"], args["to"])
def _main(_): # define and parse program flags arg_parser = flags_core.define_flags(FLAG_LIST) args, remaining_argv = flags_core.parse_flags(FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv) strategy = training_utils.handle_distribution_strategy( args["distribution_strategy"]) training_utils.startup_env( dtype=args["dtype"], enable_xla=False, enable_check_numerics=args["enable_check_numerics"]) asr_task, asr_model = _build_task_model(strategy, args["asr_model_dir"], batch_size=args["batch_size"]) mt_task, mt_model = _build_task_model(strategy, args["mt_model_dir"], batch_size=args["batch_size"]) audio_dataset = build_dataset(args) # ========= ASR ========== asr_output_file = args["asr_output_file"] if asr_output_file is None: asr_output_file = "ram://asr_output_file" logging.info("Creating ASR generator.") with training_utils.get_strategy_scope(strategy): asr_generator = build_exp( { "class": SequenceGenerator, "params": { "output_file": asr_output_file, "search_method.class": args["asr_search_method.class"], "search_method.params": args["asr_search_method.params"], } }, strategy=strategy, model=asr_model, task=asr_task, model_dir=args["asr_model_dir"], custom_dataset=audio_dataset) asr_generator.run() if hasattr(audio_dataset, "transcripts") and audio_dataset.transcripts is not None: asr_metric = asr_task.get_eval_metric(args, "asr_metric") with tf.io.gfile.GFile(asr_output_file, "r") as fp: metric_result = asr_metric([line.strip() for line in fp], audio_dataset.transcripts) logging.info("Evaluation Result of ASR:") for k, v in metric_result.items(): logging.info(" %s=%.2f", k, v) logging.info("Creating MT generator.") mt_reference_file = "ram://mt_reference_file" with tf.io.gfile.GFile(mt_reference_file, "w") as fw: for x in audio_dataset.targets: fw.write(x.strip() + "\n") with training_utils.get_strategy_scope(strategy): mt_generator = build_exp( { "class": SequenceGenerator, "params": { "output_file": args["mt_output_file"], "search_method.class": args["mt_search_method.class"], "search_method.params": args["mt_search_method.params"], "metric.class": args["mt_metric.class"], "metric.params": args["mt_metric.params"] } }, strategy=strategy, model=mt_model, task=mt_task, model_dir=args["mt_model_dir"], custom_dataset=build_dataset({ "class": ParallelTextDataset, "params": { "src_file": asr_output_file, "trg_file": mt_reference_file } })) mt_generator.run()