Esempio n. 1
0
def _main(_):
    arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False)
    args, remaining_argv = flags_core.intelligent_parse_flags(
        FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    average_checkpoints(checkpoints=flatten_string_list(args["checkpoints"]),
                        output_path=args["output_path"])
Esempio n. 2
0
def run_experiment(args, remaining_argv):
    strategy = training_utils.handle_distribution_strategy(
        args["distribution_strategy"])
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    training_utils.startup_env(
        dtype=args["dtype"],
        enable_check_numerics=args["enable_check_numerics"],
        enable_xla=args["enable_xla"])

    # initialize parameters for quantization.
    if args.get("quant_params", None) is None:
        args["quant_params"] = {}
    QuantLayer.global_init(args["enable_quant"], **args["quant_params"])

    # create exps: trainer, evaluator or ...
    with training_utils.get_strategy_scope(strategy):
        task = build_task(args)
        custom_dataset = build_dataset(args)
        try:
            model = task.build_model(args)
            training_utils.validate_unique_varname(model.weights)
        except AttributeError:
            model = None
        entry = build_exp(args,
                          strategy=strategy,
                          model=model,
                          task=task,
                          model_dir=args["model_dir"],
                          custom_dataset=custom_dataset)
    entry.run()
Esempio n. 3
0
def _main(_):
    arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False)
    args, remaining_argv = flags_core.intelligent_parse_flags(
        FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    metric = build_metric(args)
    evaluate(metric, args["hypo_file"], args["ref_file"])
Esempio n. 4
0
def _main(_):
    arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False)
    args, remaining_argv = flags_core.intelligent_parse_flags(
        FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    generate_vocab(input=args["input"],
                   output=args["output"],
                   min_frequency=args["min_frequency"],
                   max_vocab_size=args["max_vocab_size"],
                   lowercase=args["lowercase"],
                   extra_slots=args["extra_slots"])
Esempio n. 5
0
def _main(_):
    # define and parse program flags
    arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=True)
    args, remaining_argv = flags_core.intelligent_parse_flags(
        FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    dataset = build_dataset(args)
    feature_extractor = build_feature_extractor(args)
    if dataset is None:
        raise ValueError("dataset must be provided.")
    main(dataset, feature_extractor)
Esempio n. 6
0
def _main(_):
    arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False)
    args, remaining_argv = flags_core.intelligent_parse_flags(FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)

    tokenizer = build_tokenizer(args)
    with tf.io.gfile.GFile(args["input"]) as fp:
        with tf.io.gfile.GFile(args["output"], "w") as fw:
            for line in fp:
                line = lowercase_and_remove_punctuations(tokenizer.language, line.strip(),
                                                         args["lowercase"], args["remove_punctuation"])
                fw.write(tokenizer.tokenize(line, return_str=True) + "\n")
Esempio n. 7
0
def _main(_):
    # define and parse program flags
    arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=True)
    args, remaining_argv = flags_core.intelligent_parse_flags(
        FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    dataset = build_dataset(args)
    if dataset is None:
        raise ValueError("dataset must be provided.")
    main(dataset=dataset,
         output_transcript_file=args["output_transcript_file"],
         output_translation_file=args["output_translation_file"])
Esempio n. 8
0
def _main(_):
    # define and parse program flags
    arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=True)
    args, remaining_argv = flags_core.intelligent_parse_flags(
        FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    task = build_task(args)
    dataset = build_dataset(args)
    if dataset is None:
        raise ValueError("dataset must be provided.")
    main(processor_id=args["processor_id"],
         num_processors=args["num_processors"],
         num_output_shards=args["num_output_shards"],
         output_range_begin=args["output_range_begin"],
         output_range_end=args["output_range_end"],
         output_template=args["output_template"],
         progressbar=args["progressbar"],
         dataset=dataset,
         task=task)
Esempio n. 9
0
def run_experiment(args, remaining_argv):
    strategy = training_utils.handle_distribution_strategy(
        args["distribution_strategy"])
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    training_utils.startup_env(
        dtype=args["dtype"],
        enable_check_numerics=args["enable_check_numerics"],
        enable_xla=args["enable_xla"])

    # create exps: trainer, evaluator or ...
    with training_utils.get_strategy_scope(strategy):
        task = build_task(args)
        custom_dataset = build_dataset(args)
        try:
            model = task.build_model(args)
        except AttributeError:
            model = None
        entry = build_exp(args,
                          strategy=strategy,
                          model=model,
                          task=task,
                          model_dir=args["model_dir"],
                          custom_dataset=custom_dataset)
    entry.run()
Esempio n. 10
0
def _main(_):
    arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False)
    args, remaining_argv = flags_core.parse_flags(FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    converter = build_converter(args["model_name"])
    convert(converter, args["from"], args["to"])
Esempio n. 11
0
def _main(_):
    # define and parse program flags
    arg_parser = flags_core.define_flags(FLAG_LIST)
    args, remaining_argv = flags_core.parse_flags(FLAG_LIST, arg_parser)
    flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
    strategy = training_utils.handle_distribution_strategy(
        args["distribution_strategy"])
    training_utils.startup_env(
        dtype=args["dtype"],
        enable_xla=False,
        enable_check_numerics=args["enable_check_numerics"])

    asr_task, asr_model = _build_task_model(strategy,
                                            args["asr_model_dir"],
                                            batch_size=args["batch_size"])
    mt_task, mt_model = _build_task_model(strategy,
                                          args["mt_model_dir"],
                                          batch_size=args["batch_size"])
    audio_dataset = build_dataset(args)
    # ========= ASR ==========
    asr_output_file = args["asr_output_file"]
    if asr_output_file is None:
        asr_output_file = "ram://asr_output_file"
    logging.info("Creating ASR generator.")
    with training_utils.get_strategy_scope(strategy):
        asr_generator = build_exp(
            {
                "class": SequenceGenerator,
                "params": {
                    "output_file": asr_output_file,
                    "search_method.class": args["asr_search_method.class"],
                    "search_method.params": args["asr_search_method.params"],
                }
            },
            strategy=strategy,
            model=asr_model,
            task=asr_task,
            model_dir=args["asr_model_dir"],
            custom_dataset=audio_dataset)
    asr_generator.run()
    if hasattr(audio_dataset,
               "transcripts") and audio_dataset.transcripts is not None:
        asr_metric = asr_task.get_eval_metric(args, "asr_metric")
        with tf.io.gfile.GFile(asr_output_file, "r") as fp:
            metric_result = asr_metric([line.strip() for line in fp],
                                       audio_dataset.transcripts)
        logging.info("Evaluation Result of ASR:")
        for k, v in metric_result.items():
            logging.info("   %s=%.2f", k, v)

    logging.info("Creating MT generator.")
    mt_reference_file = "ram://mt_reference_file"
    with tf.io.gfile.GFile(mt_reference_file, "w") as fw:
        for x in audio_dataset.targets:
            fw.write(x.strip() + "\n")

    with training_utils.get_strategy_scope(strategy):
        mt_generator = build_exp(
            {
                "class": SequenceGenerator,
                "params": {
                    "output_file": args["mt_output_file"],
                    "search_method.class": args["mt_search_method.class"],
                    "search_method.params": args["mt_search_method.params"],
                    "metric.class": args["mt_metric.class"],
                    "metric.params": args["mt_metric.params"]
                }
            },
            strategy=strategy,
            model=mt_model,
            task=mt_task,
            model_dir=args["mt_model_dir"],
            custom_dataset=build_dataset({
                "class": ParallelTextDataset,
                "params": {
                    "src_file": asr_output_file,
                    "trg_file": mt_reference_file
                }
            }))
    mt_generator.run()