def testConfigOverride(self):
        config1 = {
            "model_dir": "foo",
            "train": {
                "batch_size": 32,
                "steps": 42
            }
        }
        config2 = {"model_dir": "bar", "train": {"batch_size": 64}}
        config_file_1 = os.path.join(self.get_temp_dir(), "config1.yml")
        config_file_2 = os.path.join(self.get_temp_dir(), "config2.yml")

        with open(config_file_1, mode="wb") as config_file:
            config_file.write(tf.compat.as_bytes(yaml.dump(config1)))
        with open(config_file_2, mode="wb") as config_file:
            config_file.write(tf.compat.as_bytes(yaml.dump(config2)))

        loaded_config = config.load_config([config_file_1, config_file_2])

        self.assertDictEqual(
            {
                "model_dir": "bar",
                "train": {
                    "batch_size": 64,
                    "steps": 42
                }
            }, loaded_config)
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("config", nargs="+", help="Configuration files.")
    args = parser.parse_args()
    config = load_config(args.config)
    print(yaml.dump(config, default_flow_style=False))
Exemple #3
0
    def testConfigOverride(self):
        config1 = {
            "model_dir": "foo",
            "train": {
                "batch_size": 32,
                "steps": 42
            }
        }
        config2 = {"model_dir": "bar", "train": {"batch_size": 64}}

        with open(config_file_1, "w") as config_file:
            config_file.write(yaml.dump(config1))
        with open(config_file_2, "w") as config_file:
            config_file.write(yaml.dump(config2))

        loaded_config = config.load_config([config_file_1, config_file_2])

        self.assertDictEqual(
            {
                "model_dir": "bar",
                "train": {
                    "batch_size": 64,
                    "steps": 42
                }
            }, loaded_config)
Exemple #4
0
    def testConfigOverride(self):
        config1 = {
            "model_dir": "foo",
            "train": {
                "batch_size": 32,
                "steps": 42
            }
        }
        config2 = {"model_dir": "bar", "train": {"batch_size": 64}}
        config_file_1 = os.path.join(self.get_temp_dir(), "config1.yml")
        config_file_2 = os.path.join(self.get_temp_dir(), "config2.yml")

        with io.open(config_file_1, encoding="utf-8", mode="w") as config_file:
            try:
                config_file.write(yaml.dump(config1))
            except TypeError:
                config_file.write(unicode(yaml.dump(config1)))
        with io.open(config_file_2, encoding="utf-8", mode="w") as config_file:
            try:
                config_file.write(yaml.dump(config2))
            except TypeError:
                config_file.write(unicode(yaml.dump(config2)))

        loaded_config = config.load_config([config_file_1, config_file_2])

        self.assertDictEqual(
            {
                "model_dir": "bar",
                "train": {
                    "batch_size": 64,
                    "steps": 42
                }
            }, loaded_config)
Exemple #5
0
  def testConfigOverride(self):
    config1 = {"model_dir": "foo", "train": {"batch_size": 32, "steps": 42}}
    config2 = {"model_dir": "bar", "train": {"batch_size": 64}}

    with open(config_file_1, "w") as config_file:
      config_file.write(yaml.dump(config1))
    with open(config_file_2, "w") as config_file:
      config_file.write(yaml.dump(config2))

    loaded_config = config.load_config([config_file_1, config_file_2])

    self.assertDictEqual(
        {"model_dir": "bar", "train": {"batch_size": 64, "steps": 42}},
        loaded_config)
Exemple #6
0
def run_model(config, save_dir, evaluate_only=False):
    tf.logging.set_verbosity('INFO')

    config = load_config([config])
    model = load_custom_model(save_dir, NMTCustom())
    session_config = tf.ConfigProto(intra_op_parallelism_threads=0,
                                    inter_op_parallelism_threads=0)
    runner = Runner(model,
                    config,
                    seed=None,
                    num_devices=1,
                    gpu_allow_growth=False,
                    session_config=session_config)
    if evaluate_only:
        runner.evaluate()
    else:
        runner.train_and_evaluate()
Exemple #7
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("run",
                        choices=["train", "infer", "export"],
                        help="Run type.")
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument("--model",
                        default="",
                        help="Model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.")
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location."
    )
    parser.add_argument("--features_file",
                        default=[],
                        nargs="+",
                        help="Run inference on this file.")
    parser.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."))
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help="Checkpoint to use for inference or export (latest by default).")
    parser.add_argument(
        "--chief_host",
        default="",
        help="hostname:port of the chief worker (for distributed training).")
    parser.add_argument(
        "--worker_hosts",
        default="",
        help=("Comma-separated list of hostname:port of workers "
              "(for distributed training)."))
    parser.add_argument(
        "--ps_hosts",
        default="",
        help=("Comma-separated list of hostname:port of parameter servers "
              "(for distributed training)."))
    parser.add_argument(
        "--task_type",
        default="chief",
        choices=["chief", "worker", "ps", "evaluator"],
        help="Type of the task to run (for distributed training).")
    parser.add_argument("--task_index",
                        type=int,
                        default=0,
                        help="ID of the task (for distributed training).")
    parser.add_argument("--log_level",
                        default="INFO",
                        choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"],
                        help="Logs verbosity.")
    parser.add_argument("--gpu_allow_growth",
                        type=bool,
                        default=False,
                        help="Allocate GPU memory dynamically.")
    args = parser.parse_args()

    tf.logging.set_verbosity(getattr(tf.logging, args.log_level))

    # Setup cluster if defined.
    if args.chief_host:
        os.environ["TF_CONFIG"] = json.dumps({
            "cluster": {
                "chief": [args.chief_host],
                "worker": args.worker_hosts.split(","),
                "ps": args.ps_hosts.split(",")
            },
            "task": {
                "type": args.task_type,
                "index": args.task_index
            }
        })

    # Load and merge run configurations.
    config = load_config(args.config)

    if args.run_dir:
        config["model_dir"] = _prefix_path(args.run_dir, config["model_dir"])
    if not os.path.isdir(config["model_dir"]):
        tf.logging.info("Creating model directory %s", config["model_dir"])
        os.makedirs(config["model_dir"])

    session_config = tf.ConfigProto()
    session_config.gpu_options.allow_growth = args.gpu_allow_growth

    run_config = tf.estimator.RunConfig(model_dir=config["model_dir"],
                                        session_config=session_config)

    if "train" in config:
        if "save_summary_steps" in config["train"]:
            run_config = run_config.replace(
                save_summary_steps=config["train"]["save_summary_steps"],
                log_step_count_steps=config["train"]["save_summary_steps"])
        if "save_checkpoints_steps" in config["train"]:
            run_config = run_config.replace(
                save_checkpoints_secs=None,
                save_checkpoints_steps=config["train"]
                ["save_checkpoints_steps"])
        if "keep_checkpoint_max" in config["train"]:
            run_config = run_config.replace(
                keep_checkpoint_max=config["train"]["keep_checkpoint_max"])

    model = load_model(config["model_dir"], model_file=args.model)

    estimator = tf.estimator.Estimator(model,
                                       config=run_config,
                                       params=config["params"])

    if args.run == "train":
        if args.data_dir:
            config["data"] = _prefix_path(args.data_dir, config["data"])
        train(estimator, model, config)
    elif args.run == "infer":
        if not args.features_file:
            parser.error("--features_file is required for inference.")
        elif len(args.features_file) == 1:
            args.features_file = args.features_file[0]
        infer(args.features_file,
              estimator,
              model,
              config,
              checkpoint_path=args.checkpoint_path,
              predictions_file=args.predictions_file)
    elif args.run == "export":
        export(estimator, model, config, checkpoint_path=args.checkpoint_path)
Exemple #8
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version="OpenNMT-tf %s" % __version__)
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument(
        "--auto_config",
        default=False,
        action="store_true",
        help="Enable automatic configuration values.",
    )
    parser.add_argument(
        "--model_type",
        default="",
        choices=list(sorted(catalog.list_model_names_from_catalog())),
        help="Model type from the catalog.",
    )
    parser.add_argument("--model",
                        default="",
                        help="Custom model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.",
    )
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location.",
    )
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help=("Specific checkpoint or model directory to load "
              "(when a directory is set, the latest checkpoint is used)."),
    )
    parser.add_argument(
        "--log_level",
        default="INFO",
        choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"],
        help="Logs verbosity.",
    )
    parser.add_argument("--seed", type=int, default=None, help="Random seed.")
    parser.add_argument(
        "--gpu_allow_growth",
        default=False,
        action="store_true",
        help="Allocate GPU memory dynamically.",
    )
    parser.add_argument(
        "--intra_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of intra op threads (0 means the system picks "
              "an appropriate number)."),
    )
    parser.add_argument(
        "--inter_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of inter op threads (0 means the system picks "
              "an appropriate number)."),
    )
    parser.add_argument(
        "--mixed_precision",
        default=False,
        action="store_true",
        help="Enable mixed precision.",
    )
    parser.add_argument(
        "--eager_execution",
        default=False,
        action="store_true",
        help="Enable TensorFlow eager execution.",
    )

    subparsers = parser.add_subparsers(help="Run type.", dest="run_type")
    subparsers.required = True
    parser_train = subparsers.add_parser("train", help="Training.")
    parser_train.add_argument(
        "--with_eval",
        default=False,
        action="store_true",
        help="Enable automatic evaluation.",
    )
    parser_train.add_argument(
        "--num_gpus",
        type=int,
        default=1,
        help="Number of GPUs to use for in-graph replication.",
    )
    parser_train.add_argument(
        "--horovod",
        default=False,
        action="store_true",
        help="Enable Horovod training mode.",
    )

    parser_eval = subparsers.add_parser("eval", help="Evaluation.")
    parser_eval.add_argument("--features_file",
                             nargs="+",
                             default=None,
                             help="Input features files.")
    parser_eval.add_argument("--labels_file",
                             default=None,
                             help="Output labels files.")

    parser_infer = subparsers.add_parser("infer", help="Inference.")
    parser_infer.add_argument("--features_file",
                              nargs="+",
                              required=True,
                              help="Run inference on this file.")
    parser_infer.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."),
    )
    parser_infer.add_argument(
        "--log_prediction_time",
        default=False,
        action="store_true",
        help="Logs some prediction time metrics.",
    )

    parser_export = subparsers.add_parser("export", help="Model export.")
    parser_export.add_argument(
        "--output_dir",
        "--export_dir",
        required=True,
        help="The directory of the exported model.",
    )
    parser_export.add_argument(
        "--format",
        "--export_format",
        choices=exporters.list_exporters(),
        default="saved_model",
        help="Format of the exported model.",
    )

    parser_score = subparsers.add_parser("score", help="Scoring.")
    parser_score.add_argument("--features_file",
                              nargs="+",
                              required=True,
                              help="Features file.")
    parser_score.add_argument("--predictions_file",
                              default=None,
                              help="Predictions to score.")

    parser_average_checkpoints = subparsers.add_parser(
        "average_checkpoints", help="Checkpoint averaging.")
    parser_average_checkpoints.add_argument(
        "--output_dir",
        required=True,
        help="The output directory for the averaged checkpoint.",
    )
    parser_average_checkpoints.add_argument(
        "--max_count",
        type=int,
        default=8,
        help="The maximal number of checkpoints to average.",
    )

    parser_update_vocab = subparsers.add_parser(
        "update_vocab", help="Update model vocabularies in checkpoint.")
    parser_update_vocab.add_argument(
        "--output_dir",
        required=True,
        help="The output directory for the updated checkpoint.",
    )
    parser_update_vocab.add_argument("--src_vocab",
                                     default=None,
                                     help="Path to the new source vocabulary.")
    parser_update_vocab.add_argument("--tgt_vocab",
                                     default=None,
                                     help="Path to the new target vocabulary.")

    # When using an option that takes multiple values just before the run type,
    # the run type is treated as a value of this option. To fix this issue, we
    # inject a placeholder option just before the run type to clearly separate it.
    parser.add_argument("--placeholder",
                        action="store_true",
                        help=argparse.SUPPRESS)
    run_types = set(subparsers.choices.keys())
    args = sys.argv[1:]
    for i, arg in enumerate(args):
        if arg in run_types:
            args.insert(i, "--placeholder")
            break

    args = parser.parse_args(args)
    if (hasattr(args, "features_file") and args.features_file
            and len(args.features_file) == 1):
        args.features_file = args.features_file[0]

    _initialize_logging(getattr(logging, args.log_level))
    tf.config.threading.set_intra_op_parallelism_threads(
        args.intra_op_parallelism_threads)
    tf.config.threading.set_inter_op_parallelism_threads(
        args.inter_op_parallelism_threads)

    if args.eager_execution:
        tf.config.run_functions_eagerly(True)

    gpus = tf.config.list_physical_devices(device_type="GPU")
    if hasattr(args, "horovod") and args.horovod:
        import horovod.tensorflow as hvd

        hvd.init()
        is_master = hvd.rank() == 0
        if gpus:
            local_gpu = gpus[hvd.local_rank()]
            tf.config.set_visible_devices(local_gpu, device_type="GPU")
            gpus = [local_gpu]
    else:
        hvd = None
        is_master = True

    if args.gpu_allow_growth:
        for device in gpus:
            tf.config.experimental.set_memory_growth(device, enable=True)

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    if is_master and not tf.io.gfile.exists(config["model_dir"]):
        tf.get_logger().info("Creating model directory %s",
                             config["model_dir"])
        tf.io.gfile.makedirs(config["model_dir"])

    model = load_model(
        config["model_dir"],
        model_file=args.model,
        model_name=args.model_type,
        serialize_model=is_master,
        as_builder=True,
    )
    runner = Runner(
        model,
        config,
        auto_config=args.auto_config,
        mixed_precision=args.mixed_precision,
        seed=args.seed,
    )

    if args.run_type == "train":
        runner.train(
            num_devices=args.num_gpus,
            with_eval=args.with_eval,
            checkpoint_path=args.checkpoint_path,
            hvd=hvd,
        )
    elif args.run_type == "eval":
        metrics = runner.evaluate(
            checkpoint_path=args.checkpoint_path,
            features_file=args.features_file,
            labels_file=args.labels_file,
        )
        print(metrics)
    elif args.run_type == "infer":
        runner.infer(
            args.features_file,
            predictions_file=args.predictions_file,
            checkpoint_path=args.checkpoint_path,
            log_time=args.log_prediction_time,
        )
    elif args.run_type == "export":
        runner.export(
            args.output_dir,
            checkpoint_path=args.checkpoint_path,
            exporter=exporters.make_exporter(args.format),
        )
    elif args.run_type == "score":
        runner.score(
            args.features_file,
            args.predictions_file,
            checkpoint_path=args.checkpoint_path,
        )
    elif args.run_type == "average_checkpoints":
        runner.average_checkpoints(args.output_dir, max_count=args.max_count)
    elif args.run_type == "update_vocab":
        runner.update_vocab(args.output_dir,
                            src_vocab=args.src_vocab,
                            tgt_vocab=args.tgt_vocab)
Exemple #9
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "run",
        choices=["train_and_eval", "train", "eval", "infer", "export"],
        help="Run type.")
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument("--model",
                        default="",
                        help="Model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.")
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location."
    )
    parser.add_argument("--features_file",
                        default=[],
                        nargs="+",
                        help="Run inference on this file.")
    parser.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."))
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help=("Checkpoint or directory to use for inference or export "
              "(when a directory is set, the latest checkpoint is used)."))
    parser.add_argument("--num_gpus",
                        type=int,
                        default=1,
                        help="Number of GPUs to use for in-graph replication.")
    parser.add_argument(
        "--chief_host",
        default="",
        help="hostname:port of the chief worker (for distributed training).")
    parser.add_argument(
        "--worker_hosts",
        default="",
        help=("Comma-separated list of hostname:port of workers "
              "(for distributed training)."))
    parser.add_argument(
        "--ps_hosts",
        default="",
        help=("Comma-separated list of hostname:port of parameter servers "
              "(for distributed training)."))
    parser.add_argument(
        "--task_type",
        default="chief",
        choices=["chief", "worker", "ps", "evaluator"],
        help="Type of the task to run (for distributed training).")
    parser.add_argument("--task_index",
                        type=int,
                        default=0,
                        help="ID of the task (for distributed training).")
    parser.add_argument("--log_level",
                        default="INFO",
                        choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"],
                        help="Logs verbosity.")
    parser.add_argument("--seed", type=int, default=None, help="Random seed.")
    parser.add_argument("--gpu_allow_growth",
                        default=False,
                        action="store_true",
                        help="Allocate GPU memory dynamically.")
    args = parser.parse_args()

    tf.logging.set_verbosity(getattr(tf.logging, args.log_level))

    # Setup cluster if defined.
    if args.chief_host:
        os.environ["TF_CONFIG"] = json.dumps({
            "cluster": {
                "chief": [args.chief_host],
                "worker": args.worker_hosts.split(","),
                "ps": args.ps_hosts.split(",")
            },
            "task": {
                "type": args.task_type,
                "index": args.task_index
            }
        })

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    if not os.path.isdir(config["model_dir"]):
        tf.logging.info("Creating model directory %s", config["model_dir"])
        os.makedirs(config["model_dir"])

    model = load_model(config["model_dir"], model_file=args.model)
    runner = Runner(model,
                    config,
                    seed=args.seed,
                    num_devices=args.num_gpus,
                    gpu_allow_growth=args.gpu_allow_growth)

    if args.run == "train_and_eval":
        runner.train_and_evaluate()
    elif args.run == "train":
        runner.train()
    elif args.run == "eval":
        runner.evaluate(checkpoint_path=args.checkpoint_path)
    elif args.run == "infer":
        if not args.features_file:
            parser.error("--features_file is required for inference.")
        elif len(args.features_file) == 1:
            args.features_file = args.features_file[0]
        runner.infer(args.features_file,
                     predictions_file=args.predictions_file,
                     checkpoint_path=args.checkpoint_path)
    elif args.run == "export":
        runner.export(checkpoint_path=args.checkpoint_path)
Exemple #10
0
def main():
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument("run", choices=["train", "infer", "export"],
                      help="Run type.")
  parser.add_argument("--config", required=True, nargs="+",
                      help="List of configuration files.")
  parser.add_argument("--model", default="",
                      help="Model configuration file.")
  parser.add_argument("--run_dir", default="",
                      help="If set, model_dir will be created relative to this location.")
  parser.add_argument("--data_dir", default="",
                      help="If set, data files are expected to be relative to this location.")
  parser.add_argument("--features_file", default=[], nargs="+",
                      help="Run inference on this file.")
  parser.add_argument("--predictions_file", default="",
                      help=("File used to save predictions. If not set, predictions are printed "
                            "on the standard output."))
  parser.add_argument("--checkpoint_path", default=None,
                      help=("Checkpoint or directory to use for inference or export "
                            "(when a directory is set, the latest checkpoint is used)."))
  parser.add_argument("--num_gpus", type=int, default=1,
                      help="Number of GPUs to use for in-graph replication.")
  parser.add_argument("--chief_host", default="",
                      help="hostname:port of the chief worker (for distributed training).")
  parser.add_argument("--worker_hosts", default="",
                      help=("Comma-separated list of hostname:port of workers "
                            "(for distributed training)."))
  parser.add_argument("--ps_hosts", default="",
                      help=("Comma-separated list of hostname:port of parameter servers "
                            "(for distributed training)."))
  parser.add_argument("--task_type", default="chief",
                      choices=["chief", "worker", "ps", "evaluator"],
                      help="Type of the task to run (for distributed training).")
  parser.add_argument("--task_index", type=int, default=0,
                      help="ID of the task (for distributed training).")
  parser.add_argument("--log_level", default="INFO",
                      choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"],
                      help="Logs verbosity.")
  parser.add_argument("--gpu_allow_growth", type=bool, default=False,
                      help="Allocate GPU memory dynamically.")
  args = parser.parse_args()

  tf.logging.set_verbosity(getattr(tf.logging, args.log_level))

  # Setup cluster if defined.
  if args.chief_host:
    os.environ["TF_CONFIG"] = json.dumps({
        "cluster": {
            "chief": [args.chief_host],
            "worker": args.worker_hosts.split(","),
            "ps": args.ps_hosts.split(",")
        },
        "task": {
            "type": args.task_type,
            "index": args.task_index
        }
    })

  # Load and merge run configurations.
  config = load_config(args.config)

  if args.run_dir:
    config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
  if not os.path.isdir(config["model_dir"]):
    tf.logging.info("Creating model directory %s", config["model_dir"])
    os.makedirs(config["model_dir"])

  session_config = tf.ConfigProto(
      allow_soft_placement=True,
      log_device_placement=False,
      gpu_options=tf.GPUOptions(
          allow_growth=args.gpu_allow_growth))

  run_config = tf.estimator.RunConfig(
      model_dir=config["model_dir"],
      session_config=session_config)

  if "train" in config:
    if "save_summary_steps" in config["train"]:
      run_config = run_config.replace(
          save_summary_steps=config["train"]["save_summary_steps"],
          log_step_count_steps=config["train"]["save_summary_steps"])
    if "save_checkpoints_steps" in config["train"]:
      run_config = run_config.replace(
          save_checkpoints_secs=None,
          save_checkpoints_steps=config["train"]["save_checkpoints_steps"])
    if "keep_checkpoint_max" in config["train"]:
      run_config = run_config.replace(
          keep_checkpoint_max=config["train"]["keep_checkpoint_max"])

  model = load_model(config["model_dir"], model_file=args.model)

  estimator = tf.estimator.Estimator(
      model.model_fn(num_devices=args.num_gpus),
      config=run_config,
      params=config["params"])

  checkpoint_path = args.checkpoint_path
  if checkpoint_path is not None and os.path.isdir(checkpoint_path):
    checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)

  if args.run == "train":
    if args.data_dir:
      config["data"] = _prefix_paths(args.data_dir, config["data"])
    train(estimator, model, config, num_devices=args.num_gpus)
  elif args.run == "infer":
    if not args.features_file:
      parser.error("--features_file is required for inference.")
    elif len(args.features_file) == 1:
      args.features_file = args.features_file[0]
    infer(
        args.features_file,
        estimator,
        model,
        config,
        checkpoint_path=checkpoint_path,
        predictions_file=args.predictions_file)
  elif args.run == "export":
    export(estimator, model, config, checkpoint_path=checkpoint_path)
Exemple #11
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version="OpenNMT-tf %s" % __version__)
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument("--auto_config",
                        default=False,
                        action="store_true",
                        help="Enable automatic configuration values.")
    parser.add_argument("--model_type",
                        default="",
                        choices=list(
                            classes_in_module(catalog, public_only=True)),
                        help="Model type from the catalog.")
    parser.add_argument("--model",
                        default="",
                        help="Custom model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.")
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location."
    )
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help=("Specific checkpoint or model directory to load "
              "(when a directory is set, the latest checkpoint is used)."))
    parser.add_argument(
        "--log_level",
        default="INFO",
        choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"],
        help="Logs verbosity.")
    parser.add_argument("--seed", type=int, default=None, help="Random seed.")
    parser.add_argument("--gpu_allow_growth",
                        default=False,
                        action="store_true",
                        help="Allocate GPU memory dynamically.")
    parser.add_argument(
        "--intra_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of intra op threads (0 means the system picks "
              "an appropriate number)."))
    parser.add_argument(
        "--inter_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of inter op threads (0 means the system picks "
              "an appropriate number)."))
    parser.add_argument("--mixed_precision",
                        default=False,
                        action="store_true",
                        help="Enable mixed precision.")

    subparsers = parser.add_subparsers(help="Run type.", dest="run")
    parser_train = subparsers.add_parser("train", help="Training.")
    parser_train.add_argument("--with_eval",
                              default=False,
                              action="store_true",
                              help="Enable automatic evaluation.")
    parser_train.add_argument(
        "--num_gpus",
        type=int,
        default=1,
        help="Number of GPUs to use for in-graph replication.")

    parser_eval = subparsers.add_parser("eval", help="Evaluation.")
    parser_eval.add_argument("--features_file",
                             nargs="+",
                             default=None,
                             help="Input features files.")
    parser_eval.add_argument("--labels_file",
                             default=None,
                             help="Output labels files.")

    parser_infer = subparsers.add_parser("infer", help="Inference.")
    parser_infer.add_argument("--features_file",
                              nargs="+",
                              required=True,
                              help="Run inference on this file.")
    parser_infer.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."))
    parser_infer.add_argument("--log_prediction_time",
                              default=False,
                              action="store_true",
                              help="Logs some prediction time metrics.")

    parser_export = subparsers.add_parser("export", help="Model export.")
    parser_export.add_argument("--export_dir",
                               required=True,
                               help="The directory of the exported model.")

    parser_score = subparsers.add_parser("score", help="Scoring.")
    parser_score.add_argument("--features_file",
                              nargs="+",
                              required=True,
                              help="Features file.")
    parser_score.add_argument("--predictions_file",
                              default=None,
                              help="Predictions to score.")

    parser_average_checkpoints = subparsers.add_parser(
        "average_checkpoints", help="Checkpoint averaging.")
    parser_average_checkpoints.add_argument(
        "--output_dir",
        required=True,
        help="The output directory for the averaged checkpoint.")
    parser_average_checkpoints.add_argument(
        "--max_count",
        type=int,
        default=8,
        help="The maximal number of checkpoints to average.")

    parser_update_vocab = subparsers.add_parser(
        "update_vocab", help="Update model vocabularies in checkpoint.")
    parser_update_vocab.add_argument(
        "--output_dir",
        required=True,
        help="The output directory for the updated checkpoint.")
    parser_update_vocab.add_argument("--src_vocab",
                                     default=None,
                                     help="Path to the new source vocabulary.")
    parser_update_vocab.add_argument("--tgt_vocab",
                                     default=None,
                                     help="Path to the new target vocabulary.")

    args = parser.parse_args()
    if hasattr(args, "features_file") and args.features_file and len(
            args.features_file) == 1:
        args.features_file = args.features_file[0]

    _set_log_level(getattr(logging, args.log_level))
    tf.config.threading.set_intra_op_parallelism_threads(
        args.intra_op_parallelism_threads)
    tf.config.threading.set_inter_op_parallelism_threads(
        args.inter_op_parallelism_threads)
    if args.gpu_allow_growth:
        for device in tf.config.experimental.list_physical_devices(
                device_type="GPU"):
            tf.config.experimental.set_memory_growth(device, enable=True)

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    if not tf.io.gfile.exists(config["model_dir"]):
        tf.get_logger().info("Creating model directory %s",
                             config["model_dir"])
        tf.io.gfile.makedirs(config["model_dir"])

    model = load_model(config["model_dir"],
                       model_file=args.model,
                       model_name=args.model_type)
    runner = Runner(model,
                    config,
                    auto_config=args.auto_config,
                    mixed_precision=args.mixed_precision,
                    seed=args.seed)

    if args.run == "train":
        runner.train(num_devices=args.num_gpus,
                     with_eval=args.with_eval,
                     checkpoint_path=args.checkpoint_path)
    elif args.run == "eval":
        metrics = runner.evaluate(checkpoint_path=args.checkpoint_path,
                                  features_file=args.features_file,
                                  labels_file=args.labels_file)
        print(metrics)
    elif args.run == "infer":
        runner.infer(args.features_file,
                     predictions_file=args.predictions_file,
                     checkpoint_path=args.checkpoint_path,
                     log_time=args.log_prediction_time)
    elif args.run == "export":
        runner.export(args.export_dir, checkpoint_path=args.checkpoint_path)
    elif args.run == "score":
        runner.score(args.features_file,
                     args.predictions_file,
                     checkpoint_path=args.checkpoint_path)
    elif args.run == "average_checkpoints":
        runner.average_checkpoints(args.output_dir, max_count=args.max_count)
    elif args.run == "update_vocab":
        runner.update_vocab(args.output_dir,
                            src_vocab=args.src_vocab,
                            tgt_vocab=args.tgt_vocab)
Exemple #12
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("run",
                        choices=[
                            "train_and_eval", "train", "eval", "infer",
                            "export", "score"
                        ],
                        help="Run type.")
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument("--model_type",
                        default="",
                        choices=list(classes_in_module(catalog)),
                        help="Model type from the catalog.")
    parser.add_argument("--model",
                        default="",
                        help="Custom model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.")
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location."
    )
    parser.add_argument("--features_file",
                        default=[],
                        nargs="+",
                        help="Run inference on this file.")
    parser.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."))
    parser.add_argument("--log_prediction_time",
                        default=False,
                        action="store_true",
                        help="Logs some prediction time metrics.")
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help=("Checkpoint or directory to use for inference or export "
              "(when a directory is set, the latest checkpoint is used)."))
    parser.add_argument("--num_gpus",
                        type=int,
                        default=1,
                        help="Number of GPUs to use for in-graph replication.")
    parser.add_argument(
        "--chief_host",
        default="",
        help="hostname:port of the chief worker (for distributed training).")
    parser.add_argument(
        "--worker_hosts",
        default="",
        help=("Comma-separated list of hostname:port of workers "
              "(for distributed training)."))
    parser.add_argument(
        "--ps_hosts",
        default="",
        help=("Comma-separated list of hostname:port of parameter servers "
              "(for distributed training)."))
    parser.add_argument(
        "--task_type",
        default="chief",
        choices=["chief", "worker", "ps", "evaluator"],
        help="Type of the task to run (for distributed training).")
    parser.add_argument("--task_index",
                        type=int,
                        default=0,
                        help="ID of the task (for distributed training).")
    parser.add_argument("--log_level",
                        default="INFO",
                        choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"],
                        help="Logs verbosity.")
    parser.add_argument("--seed", type=int, default=None, help="Random seed.")
    parser.add_argument("--gpu_allow_growth",
                        default=False,
                        action="store_true",
                        help="Allocate GPU memory dynamically.")
    parser.add_argument(
        "--intra_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of intra op threads (0 means the system picks "
              "an appropriate number)."))
    parser.add_argument(
        "--inter_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of inter op threads (0 means the system picks "
              "an appropriate number)."))
    parser.add_argument(
        "--session_config",
        default=None,
        help=(
            "Path to a file containing a tf.ConfigProto message in text format "
            "and used to create the TensorFlow sessions."))
    args = parser.parse_args()

    tf.logging.set_verbosity(getattr(tf.logging, args.log_level))

    # Setup cluster if defined.
    if args.chief_host:
        os.environ["TF_CONFIG"] = json.dumps({
            "cluster": {
                "chief": [args.chief_host],
                "worker": args.worker_hosts.split(","),
                "ps": args.ps_hosts.split(",")
            },
            "task": {
                "type": args.task_type,
                "index": args.task_index
            }
        })

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    is_chief = args.task_type == "chief"
    if is_chief and not tf.gfile.Exists(config["model_dir"]):
        tf.logging.info("Creating model directory %s", config["model_dir"])
        tf.gfile.MakeDirs(config["model_dir"])

    model = load_model(config["model_dir"],
                       model_file=args.model,
                       model_name=args.model_type,
                       serialize_model=is_chief)
    session_config = tf.ConfigProto(
        intra_op_parallelism_threads=args.intra_op_parallelism_threads,
        inter_op_parallelism_threads=args.inter_op_parallelism_threads)
    if args.session_config is not None:
        with open(args.session_config, "rb") as session_config_file:
            text_format.Merge(session_config_file.read(), session_config)
    runner = Runner(model,
                    config,
                    seed=args.seed,
                    num_devices=args.num_gpus,
                    gpu_allow_growth=args.gpu_allow_growth,
                    session_config=session_config)

    if args.run == "train_and_eval":
        runner.train_and_evaluate()
    elif args.run == "train":
        runner.train()
    elif args.run == "eval":
        runner.evaluate(checkpoint_path=args.checkpoint_path)
    elif args.run == "infer":
        if not args.features_file:
            parser.error("--features_file is required for inference.")
        elif len(args.features_file) == 1:
            args.features_file = args.features_file[0]
        runner.infer(args.features_file,
                     predictions_file=args.predictions_file,
                     checkpoint_path=args.checkpoint_path,
                     log_time=args.log_prediction_time)
    elif args.run == "export":
        runner.export(checkpoint_path=args.checkpoint_path)
    elif args.run == "score":
        if not args.features_file:
            parser.error("--features_file is required for scoring.")
        if not args.predictions_file:
            parser.error("--predictions_file is required for scoring.")
        runner.score(args.features_file,
                     args.predictions_file,
                     checkpoint_path=args.checkpoint_path)
Exemple #13
0
# GA_TRACKING_ID = os.environ['GA_TRACKING_ID']

db = SQLAlchemy(app)
bcrypt = Bcrypt(app)
login_manager = LoginManager(app)
login_manager.login_view = 'login'
login_manager.login_message_category = 'info'

avi_vectorizer = load(
    os.path.join('website', 'static', 'arabic_variety_identification',
                 'dialect_identification.vec'))
avi_model = load(
    os.path.join('website', 'static', 'arabic_variety_identification',
                 'dialect_identification.mdl'))

ca_config = load_config(
    ["diacritizer/opennmt-defaults.yml", "diacritizer_ca/toy-ende.yml"])
ca_model = load_model(ca_config["model_dir"],
                      model_file="",
                      model_name="",
                      serialize_model=False)
ca_runner = Runner(ca_model,
                   ca_config,
                   seed=None,
                   num_devices=1,
                   gpu_allow_growth=False)

msa_config = load_config(
    ["diacritizer/opennmt-defaults.yml", "diacritizer_msa/toy-ende.yml"])
msa_model = load_model(msa_config["model_dir"],
                       model_file="",
                       model_name="",
Exemple #14
0
def create_description(path):
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
#  parser.add_argument("run", default="infer",help="Run type.")
    parser.add_argument("--config", nargs="+",default=['config/opennmt-defaults.yml', 'config/default.yml'],help="List of configuration files.")
    parser.add_argument("--auto_config", default=False, action="store_true",
                      help="Enable automatic configuration values.")
    parser.add_argument("--model_type", default="",
                      choices=list(classes_in_module(catalog, public_only=True)),
                      help="Model type from the catalog.")
    parser.add_argument("--model", default="",
                      help="Custom model configuration file.")
    parser.add_argument("--run_dir", default="",
                      help="If set, model_dir will be created relative to this location.")
    parser.add_argument("--data_dir", default="",
                      help="If set, data files are expected to be relative to this location.")
    parser.add_argument("--features_file", default=[path+'test.csv'], nargs="+",
                      help="Run inference on this file.")
    parser.add_argument("--predictions_file", default=path+"output.out",
                      help=("File used to save predictions. If not set, predictions are printed "
                            "on the standard output."))
    parser.add_argument("--log_prediction_time", default=False, action="store_true",
                      help="Logs some prediction time metrics.")
    parser.add_argument("--checkpoint_path", default='model/model.ckpt-600000',
                      help=("Checkpoint or directory to use for inference or export "
                            "(when a directory is set, the latest checkpoint is used)."))
    parser.add_argument("--export_dir_base", default=None,
                      help="The base directory of the exported model.")
    parser.add_argument("--num_gpus", type=int, default=1,
                      help="Number of GPUs to use for in-graph replication.")
    parser.add_argument("--chief_host", default="",
                      help="hostname:port of the chief worker (for distributed training).")
    parser.add_argument("--worker_hosts", default="",
                      help=("Comma-separated list of hostname:port of workers "
                            "(for distributed training)."))
    parser.add_argument("--ps_hosts", default="",
                      help=("Comma-separated list of hostname:port of parameter servers "
                            "(for distributed training)."))
    parser.add_argument("--task_type", default="chief",
                      choices=["chief", "worker", "ps", "evaluator"],
                      help="Type of the task to run (for distributed training).")
    parser.add_argument("--task_index", type=int, default=0,
                      help="ID of the task (for distributed training).")
    parser.add_argument("--horovod", default=False, action="store_true",
                      help="Enable Horovod support for this run.")
    parser.add_argument("--log_level", default="INFO",
                      choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"],
                      help="Logs verbosity.")
    parser.add_argument("--seed", type=int, default=None,
                      help="Random seed.")
    parser.add_argument("--gpu_allow_growth", default=False, action="store_true",
                      help="Allocate GPU memory dynamically.")
    parser.add_argument("--intra_op_parallelism_threads", type=int, default=0,
                      help=("Number of intra op threads (0 means the system picks "
                            "an appropriate number)."))
    parser.add_argument("--inter_op_parallelism_threads", type=int, default=0,
                      help=("Number of inter op threads (0 means the system picks "
                            "an appropriate number)."))
    parser.add_argument("--session_config", default=None,
                      help=("Path to a file containing a tf.ConfigProto message in text format "
                            "and used to create the TensorFlow sessions."))
    parser.add_argument("--json",default=None,required=True,help=("input data as json string"))
    args = parser.parse_args()
    #inp = args.inputstring
    #print (inp)
    print (args)

    tf.compat.v1.logging.set_verbosity(getattr(tf.compat.v1.logging, args.log_level))

    # Setup cluster if defined.
    if args.chief_host:
        if args.run != "train_and_eval":
            raise ValueError("Distributed training is only supported with the train_and_eval run type")
        os.environ["TF_CONFIG"] = json.dumps({
            "cluster": {
                "chief": [args.chief_host],
                "worker": args.worker_hosts.split(","),
                "ps": args.ps_hosts.split(",")
            },
            "task": {
                "type": args.task_type,
                "index": args.task_index
            }
        })

    # Initialize Horovd if defined.
    if args.horovod:
        import horovod.tensorflow as hvd
        hvd.init()
        is_chief = hvd.rank() == 0
    else:
        hvd = None
        is_chief = args.task_type == "chief"

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    if is_chief and not tf.io.gfile.exists(config["model_dir"]):
        tf.logging.info("Creating model directory %s", config["model_dir"])
        tf.gfile.MakeDirs(config["model_dir"])
    model = load_model(
      config["model_dir"],
      model_file=args.model,
      model_name=args.model_type,
      serialize_model=is_chief)
    session_config = tf.compat.v1.ConfigProto(
      intra_op_parallelism_threads=args.intra_op_parallelism_threads,
      inter_op_parallelism_threads=args.inter_op_parallelism_threads)
     # gpu_options=tf.GPUOptions(
     #     allow_growth=args.gpu_allow_growth))
    if args.session_config is not None:
        with open(args.session_config, "rb") as session_config_file:
            text_format.Merge(session_config_file.read(), session_config)
    try:
    #if True:
        #add 11/25,change json string to file
        with open(args.json,"r") as load_f:
        #data = json.loads(args.json)
            data = json.load(load_f)
#        print (data["category1"])
        savedata(data,path)
    except:
        print ("json is incorrected")
        exit(1)
    runner = Runner(
      model,
      config,
      seed=args.seed,
      #num_devices=args.num_gpus,
      session_config=session_config,
      auto_config=args.auto_config,
      hvd=hvd)

    if not args.features_file:
        parser.error("--features_file is required for inference.")
    elif len(args.features_file) == 1:
        args.features_file = args.features_file[0]
    print ("begin to run....")
    runner.infer(
        args.features_file,
        predictions_file=args.predictions_file,
        checkpoint_path=args.checkpoint_path,
        log_time=args.log_prediction_time)
    description = normalize.normalize(path+"output.out",data)
    data["description"] = description
    json_str = json.dumps(data,ensure_ascii=False)
	
    return (json_str)
Exemple #15
0
def main():
  parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument("config", nargs="+", help="Configuration files.")
  args = parser.parse_args()
  config = load_config(args.config)
  print(yaml.dump(config, default_flow_style=False))