def testConfigOverride(self): config1 = { "model_dir": "foo", "train": { "batch_size": 32, "steps": 42 } } config2 = {"model_dir": "bar", "train": {"batch_size": 64}} config_file_1 = os.path.join(self.get_temp_dir(), "config1.yml") config_file_2 = os.path.join(self.get_temp_dir(), "config2.yml") with open(config_file_1, mode="wb") as config_file: config_file.write(tf.compat.as_bytes(yaml.dump(config1))) with open(config_file_2, mode="wb") as config_file: config_file.write(tf.compat.as_bytes(yaml.dump(config2))) loaded_config = config.load_config([config_file_1, config_file_2]) self.assertDictEqual( { "model_dir": "bar", "train": { "batch_size": 64, "steps": 42 } }, loaded_config)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("config", nargs="+", help="Configuration files.") args = parser.parse_args() config = load_config(args.config) print(yaml.dump(config, default_flow_style=False))
def testConfigOverride(self): config1 = { "model_dir": "foo", "train": { "batch_size": 32, "steps": 42 } } config2 = {"model_dir": "bar", "train": {"batch_size": 64}} with open(config_file_1, "w") as config_file: config_file.write(yaml.dump(config1)) with open(config_file_2, "w") as config_file: config_file.write(yaml.dump(config2)) loaded_config = config.load_config([config_file_1, config_file_2]) self.assertDictEqual( { "model_dir": "bar", "train": { "batch_size": 64, "steps": 42 } }, loaded_config)
def testConfigOverride(self): config1 = { "model_dir": "foo", "train": { "batch_size": 32, "steps": 42 } } config2 = {"model_dir": "bar", "train": {"batch_size": 64}} config_file_1 = os.path.join(self.get_temp_dir(), "config1.yml") config_file_2 = os.path.join(self.get_temp_dir(), "config2.yml") with io.open(config_file_1, encoding="utf-8", mode="w") as config_file: try: config_file.write(yaml.dump(config1)) except TypeError: config_file.write(unicode(yaml.dump(config1))) with io.open(config_file_2, encoding="utf-8", mode="w") as config_file: try: config_file.write(yaml.dump(config2)) except TypeError: config_file.write(unicode(yaml.dump(config2))) loaded_config = config.load_config([config_file_1, config_file_2]) self.assertDictEqual( { "model_dir": "bar", "train": { "batch_size": 64, "steps": 42 } }, loaded_config)
def testConfigOverride(self): config1 = {"model_dir": "foo", "train": {"batch_size": 32, "steps": 42}} config2 = {"model_dir": "bar", "train": {"batch_size": 64}} with open(config_file_1, "w") as config_file: config_file.write(yaml.dump(config1)) with open(config_file_2, "w") as config_file: config_file.write(yaml.dump(config2)) loaded_config = config.load_config([config_file_1, config_file_2]) self.assertDictEqual( {"model_dir": "bar", "train": {"batch_size": 64, "steps": 42}}, loaded_config)
def run_model(config, save_dir, evaluate_only=False): tf.logging.set_verbosity('INFO') config = load_config([config]) model = load_custom_model(save_dir, NMTCustom()) session_config = tf.ConfigProto(intra_op_parallelism_threads=0, inter_op_parallelism_threads=0) runner = Runner(model, config, seed=None, num_devices=1, gpu_allow_growth=False, session_config=session_config) if evaluate_only: runner.evaluate() else: runner.train_and_evaluate()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("run", choices=["train", "infer", "export"], help="Run type.") parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument("--model", default="", help="Model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location." ) parser.add_argument("--features_file", default=[], nargs="+", help="Run inference on this file.") parser.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser.add_argument( "--checkpoint_path", default=None, help="Checkpoint to use for inference or export (latest by default).") parser.add_argument( "--chief_host", default="", help="hostname:port of the chief worker (for distributed training).") parser.add_argument( "--worker_hosts", default="", help=("Comma-separated list of hostname:port of workers " "(for distributed training).")) parser.add_argument( "--ps_hosts", default="", help=("Comma-separated list of hostname:port of parameter servers " "(for distributed training).")) parser.add_argument( "--task_type", default="chief", choices=["chief", "worker", "ps", "evaluator"], help="Type of the task to run (for distributed training).") parser.add_argument("--task_index", type=int, default=0, help="ID of the task (for distributed training).") parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"], help="Logs verbosity.") parser.add_argument("--gpu_allow_growth", type=bool, default=False, help="Allocate GPU memory dynamically.") args = parser.parse_args() tf.logging.set_verbosity(getattr(tf.logging, args.log_level)) # Setup cluster if defined. if args.chief_host: os.environ["TF_CONFIG"] = json.dumps({ "cluster": { "chief": [args.chief_host], "worker": args.worker_hosts.split(","), "ps": args.ps_hosts.split(",") }, "task": { "type": args.task_type, "index": args.task_index } }) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = _prefix_path(args.run_dir, config["model_dir"]) if not os.path.isdir(config["model_dir"]): tf.logging.info("Creating model directory %s", config["model_dir"]) os.makedirs(config["model_dir"]) session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = args.gpu_allow_growth run_config = tf.estimator.RunConfig(model_dir=config["model_dir"], session_config=session_config) if "train" in config: if "save_summary_steps" in config["train"]: run_config = run_config.replace( save_summary_steps=config["train"]["save_summary_steps"], log_step_count_steps=config["train"]["save_summary_steps"]) if "save_checkpoints_steps" in config["train"]: run_config = run_config.replace( save_checkpoints_secs=None, save_checkpoints_steps=config["train"] ["save_checkpoints_steps"]) if "keep_checkpoint_max" in config["train"]: run_config = run_config.replace( keep_checkpoint_max=config["train"]["keep_checkpoint_max"]) model = load_model(config["model_dir"], model_file=args.model) estimator = tf.estimator.Estimator(model, config=run_config, params=config["params"]) if args.run == "train": if args.data_dir: config["data"] = _prefix_path(args.data_dir, config["data"]) train(estimator, model, config) elif args.run == "infer": if not args.features_file: parser.error("--features_file is required for inference.") elif len(args.features_file) == 1: args.features_file = args.features_file[0] infer(args.features_file, estimator, model, config, checkpoint_path=args.checkpoint_path, predictions_file=args.predictions_file) elif args.run == "export": export(estimator, model, config, checkpoint_path=args.checkpoint_path)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-v", "--version", action="version", version="OpenNMT-tf %s" % __version__) parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument( "--auto_config", default=False, action="store_true", help="Enable automatic configuration values.", ) parser.add_argument( "--model_type", default="", choices=list(sorted(catalog.list_model_names_from_catalog())), help="Model type from the catalog.", ) parser.add_argument("--model", default="", help="Custom model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.", ) parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location.", ) parser.add_argument( "--checkpoint_path", default=None, help=("Specific checkpoint or model directory to load " "(when a directory is set, the latest checkpoint is used)."), ) parser.add_argument( "--log_level", default="INFO", choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"], help="Logs verbosity.", ) parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument( "--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.", ) parser.add_argument( "--intra_op_parallelism_threads", type=int, default=0, help=("Number of intra op threads (0 means the system picks " "an appropriate number)."), ) parser.add_argument( "--inter_op_parallelism_threads", type=int, default=0, help=("Number of inter op threads (0 means the system picks " "an appropriate number)."), ) parser.add_argument( "--mixed_precision", default=False, action="store_true", help="Enable mixed precision.", ) parser.add_argument( "--eager_execution", default=False, action="store_true", help="Enable TensorFlow eager execution.", ) subparsers = parser.add_subparsers(help="Run type.", dest="run_type") subparsers.required = True parser_train = subparsers.add_parser("train", help="Training.") parser_train.add_argument( "--with_eval", default=False, action="store_true", help="Enable automatic evaluation.", ) parser_train.add_argument( "--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.", ) parser_train.add_argument( "--horovod", default=False, action="store_true", help="Enable Horovod training mode.", ) parser_eval = subparsers.add_parser("eval", help="Evaluation.") parser_eval.add_argument("--features_file", nargs="+", default=None, help="Input features files.") parser_eval.add_argument("--labels_file", default=None, help="Output labels files.") parser_infer = subparsers.add_parser("infer", help="Inference.") parser_infer.add_argument("--features_file", nargs="+", required=True, help="Run inference on this file.") parser_infer.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output."), ) parser_infer.add_argument( "--log_prediction_time", default=False, action="store_true", help="Logs some prediction time metrics.", ) parser_export = subparsers.add_parser("export", help="Model export.") parser_export.add_argument( "--output_dir", "--export_dir", required=True, help="The directory of the exported model.", ) parser_export.add_argument( "--format", "--export_format", choices=exporters.list_exporters(), default="saved_model", help="Format of the exported model.", ) parser_score = subparsers.add_parser("score", help="Scoring.") parser_score.add_argument("--features_file", nargs="+", required=True, help="Features file.") parser_score.add_argument("--predictions_file", default=None, help="Predictions to score.") parser_average_checkpoints = subparsers.add_parser( "average_checkpoints", help="Checkpoint averaging.") parser_average_checkpoints.add_argument( "--output_dir", required=True, help="The output directory for the averaged checkpoint.", ) parser_average_checkpoints.add_argument( "--max_count", type=int, default=8, help="The maximal number of checkpoints to average.", ) parser_update_vocab = subparsers.add_parser( "update_vocab", help="Update model vocabularies in checkpoint.") parser_update_vocab.add_argument( "--output_dir", required=True, help="The output directory for the updated checkpoint.", ) parser_update_vocab.add_argument("--src_vocab", default=None, help="Path to the new source vocabulary.") parser_update_vocab.add_argument("--tgt_vocab", default=None, help="Path to the new target vocabulary.") # When using an option that takes multiple values just before the run type, # the run type is treated as a value of this option. To fix this issue, we # inject a placeholder option just before the run type to clearly separate it. parser.add_argument("--placeholder", action="store_true", help=argparse.SUPPRESS) run_types = set(subparsers.choices.keys()) args = sys.argv[1:] for i, arg in enumerate(args): if arg in run_types: args.insert(i, "--placeholder") break args = parser.parse_args(args) if (hasattr(args, "features_file") and args.features_file and len(args.features_file) == 1): args.features_file = args.features_file[0] _initialize_logging(getattr(logging, args.log_level)) tf.config.threading.set_intra_op_parallelism_threads( args.intra_op_parallelism_threads) tf.config.threading.set_inter_op_parallelism_threads( args.inter_op_parallelism_threads) if args.eager_execution: tf.config.run_functions_eagerly(True) gpus = tf.config.list_physical_devices(device_type="GPU") if hasattr(args, "horovod") and args.horovod: import horovod.tensorflow as hvd hvd.init() is_master = hvd.rank() == 0 if gpus: local_gpu = gpus[hvd.local_rank()] tf.config.set_visible_devices(local_gpu, device_type="GPU") gpus = [local_gpu] else: hvd = None is_master = True if args.gpu_allow_growth: for device in gpus: tf.config.experimental.set_memory_growth(device, enable=True) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) if is_master and not tf.io.gfile.exists(config["model_dir"]): tf.get_logger().info("Creating model directory %s", config["model_dir"]) tf.io.gfile.makedirs(config["model_dir"]) model = load_model( config["model_dir"], model_file=args.model, model_name=args.model_type, serialize_model=is_master, as_builder=True, ) runner = Runner( model, config, auto_config=args.auto_config, mixed_precision=args.mixed_precision, seed=args.seed, ) if args.run_type == "train": runner.train( num_devices=args.num_gpus, with_eval=args.with_eval, checkpoint_path=args.checkpoint_path, hvd=hvd, ) elif args.run_type == "eval": metrics = runner.evaluate( checkpoint_path=args.checkpoint_path, features_file=args.features_file, labels_file=args.labels_file, ) print(metrics) elif args.run_type == "infer": runner.infer( args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path, log_time=args.log_prediction_time, ) elif args.run_type == "export": runner.export( args.output_dir, checkpoint_path=args.checkpoint_path, exporter=exporters.make_exporter(args.format), ) elif args.run_type == "score": runner.score( args.features_file, args.predictions_file, checkpoint_path=args.checkpoint_path, ) elif args.run_type == "average_checkpoints": runner.average_checkpoints(args.output_dir, max_count=args.max_count) elif args.run_type == "update_vocab": runner.update_vocab(args.output_dir, src_vocab=args.src_vocab, tgt_vocab=args.tgt_vocab)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "run", choices=["train_and_eval", "train", "eval", "infer", "export"], help="Run type.") parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument("--model", default="", help="Model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location." ) parser.add_argument("--features_file", default=[], nargs="+", help="Run inference on this file.") parser.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser.add_argument( "--checkpoint_path", default=None, help=("Checkpoint or directory to use for inference or export " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser.add_argument( "--chief_host", default="", help="hostname:port of the chief worker (for distributed training).") parser.add_argument( "--worker_hosts", default="", help=("Comma-separated list of hostname:port of workers " "(for distributed training).")) parser.add_argument( "--ps_hosts", default="", help=("Comma-separated list of hostname:port of parameter servers " "(for distributed training).")) parser.add_argument( "--task_type", default="chief", choices=["chief", "worker", "ps", "evaluator"], help="Type of the task to run (for distributed training).") parser.add_argument("--task_index", type=int, default=0, help="ID of the task (for distributed training).") parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"], help="Logs verbosity.") parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument("--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.") args = parser.parse_args() tf.logging.set_verbosity(getattr(tf.logging, args.log_level)) # Setup cluster if defined. if args.chief_host: os.environ["TF_CONFIG"] = json.dumps({ "cluster": { "chief": [args.chief_host], "worker": args.worker_hosts.split(","), "ps": args.ps_hosts.split(",") }, "task": { "type": args.task_type, "index": args.task_index } }) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) if not os.path.isdir(config["model_dir"]): tf.logging.info("Creating model directory %s", config["model_dir"]) os.makedirs(config["model_dir"]) model = load_model(config["model_dir"], model_file=args.model) runner = Runner(model, config, seed=args.seed, num_devices=args.num_gpus, gpu_allow_growth=args.gpu_allow_growth) if args.run == "train_and_eval": runner.train_and_evaluate() elif args.run == "train": runner.train() elif args.run == "eval": runner.evaluate(checkpoint_path=args.checkpoint_path) elif args.run == "infer": if not args.features_file: parser.error("--features_file is required for inference.") elif len(args.features_file) == 1: args.features_file = args.features_file[0] runner.infer(args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path) elif args.run == "export": runner.export(checkpoint_path=args.checkpoint_path)
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("run", choices=["train", "infer", "export"], help="Run type.") parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument("--model", default="", help="Model configuration file.") parser.add_argument("--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument("--data_dir", default="", help="If set, data files are expected to be relative to this location.") parser.add_argument("--features_file", default=[], nargs="+", help="Run inference on this file.") parser.add_argument("--predictions_file", default="", help=("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser.add_argument("--checkpoint_path", default=None, help=("Checkpoint or directory to use for inference or export " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser.add_argument("--chief_host", default="", help="hostname:port of the chief worker (for distributed training).") parser.add_argument("--worker_hosts", default="", help=("Comma-separated list of hostname:port of workers " "(for distributed training).")) parser.add_argument("--ps_hosts", default="", help=("Comma-separated list of hostname:port of parameter servers " "(for distributed training).")) parser.add_argument("--task_type", default="chief", choices=["chief", "worker", "ps", "evaluator"], help="Type of the task to run (for distributed training).") parser.add_argument("--task_index", type=int, default=0, help="ID of the task (for distributed training).") parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"], help="Logs verbosity.") parser.add_argument("--gpu_allow_growth", type=bool, default=False, help="Allocate GPU memory dynamically.") args = parser.parse_args() tf.logging.set_verbosity(getattr(tf.logging, args.log_level)) # Setup cluster if defined. if args.chief_host: os.environ["TF_CONFIG"] = json.dumps({ "cluster": { "chief": [args.chief_host], "worker": args.worker_hosts.split(","), "ps": args.ps_hosts.split(",") }, "task": { "type": args.task_type, "index": args.task_index } }) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if not os.path.isdir(config["model_dir"]): tf.logging.info("Creating model directory %s", config["model_dir"]) os.makedirs(config["model_dir"]) session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, gpu_options=tf.GPUOptions( allow_growth=args.gpu_allow_growth)) run_config = tf.estimator.RunConfig( model_dir=config["model_dir"], session_config=session_config) if "train" in config: if "save_summary_steps" in config["train"]: run_config = run_config.replace( save_summary_steps=config["train"]["save_summary_steps"], log_step_count_steps=config["train"]["save_summary_steps"]) if "save_checkpoints_steps" in config["train"]: run_config = run_config.replace( save_checkpoints_secs=None, save_checkpoints_steps=config["train"]["save_checkpoints_steps"]) if "keep_checkpoint_max" in config["train"]: run_config = run_config.replace( keep_checkpoint_max=config["train"]["keep_checkpoint_max"]) model = load_model(config["model_dir"], model_file=args.model) estimator = tf.estimator.Estimator( model.model_fn(num_devices=args.num_gpus), config=run_config, params=config["params"]) checkpoint_path = args.checkpoint_path if checkpoint_path is not None and os.path.isdir(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) if args.run == "train": if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) train(estimator, model, config, num_devices=args.num_gpus) elif args.run == "infer": if not args.features_file: parser.error("--features_file is required for inference.") elif len(args.features_file) == 1: args.features_file = args.features_file[0] infer( args.features_file, estimator, model, config, checkpoint_path=checkpoint_path, predictions_file=args.predictions_file) elif args.run == "export": export(estimator, model, config, checkpoint_path=checkpoint_path)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-v", "--version", action="version", version="OpenNMT-tf %s" % __version__) parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument("--auto_config", default=False, action="store_true", help="Enable automatic configuration values.") parser.add_argument("--model_type", default="", choices=list( classes_in_module(catalog, public_only=True)), help="Model type from the catalog.") parser.add_argument("--model", default="", help="Custom model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location." ) parser.add_argument( "--checkpoint_path", default=None, help=("Specific checkpoint or model directory to load " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument( "--log_level", default="INFO", choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"], help="Logs verbosity.") parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument("--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.") parser.add_argument( "--intra_op_parallelism_threads", type=int, default=0, help=("Number of intra op threads (0 means the system picks " "an appropriate number).")) parser.add_argument( "--inter_op_parallelism_threads", type=int, default=0, help=("Number of inter op threads (0 means the system picks " "an appropriate number).")) parser.add_argument("--mixed_precision", default=False, action="store_true", help="Enable mixed precision.") subparsers = parser.add_subparsers(help="Run type.", dest="run") parser_train = subparsers.add_parser("train", help="Training.") parser_train.add_argument("--with_eval", default=False, action="store_true", help="Enable automatic evaluation.") parser_train.add_argument( "--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser_eval = subparsers.add_parser("eval", help="Evaluation.") parser_eval.add_argument("--features_file", nargs="+", default=None, help="Input features files.") parser_eval.add_argument("--labels_file", default=None, help="Output labels files.") parser_infer = subparsers.add_parser("infer", help="Inference.") parser_infer.add_argument("--features_file", nargs="+", required=True, help="Run inference on this file.") parser_infer.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser_infer.add_argument("--log_prediction_time", default=False, action="store_true", help="Logs some prediction time metrics.") parser_export = subparsers.add_parser("export", help="Model export.") parser_export.add_argument("--export_dir", required=True, help="The directory of the exported model.") parser_score = subparsers.add_parser("score", help="Scoring.") parser_score.add_argument("--features_file", nargs="+", required=True, help="Features file.") parser_score.add_argument("--predictions_file", default=None, help="Predictions to score.") parser_average_checkpoints = subparsers.add_parser( "average_checkpoints", help="Checkpoint averaging.") parser_average_checkpoints.add_argument( "--output_dir", required=True, help="The output directory for the averaged checkpoint.") parser_average_checkpoints.add_argument( "--max_count", type=int, default=8, help="The maximal number of checkpoints to average.") parser_update_vocab = subparsers.add_parser( "update_vocab", help="Update model vocabularies in checkpoint.") parser_update_vocab.add_argument( "--output_dir", required=True, help="The output directory for the updated checkpoint.") parser_update_vocab.add_argument("--src_vocab", default=None, help="Path to the new source vocabulary.") parser_update_vocab.add_argument("--tgt_vocab", default=None, help="Path to the new target vocabulary.") args = parser.parse_args() if hasattr(args, "features_file") and args.features_file and len( args.features_file) == 1: args.features_file = args.features_file[0] _set_log_level(getattr(logging, args.log_level)) tf.config.threading.set_intra_op_parallelism_threads( args.intra_op_parallelism_threads) tf.config.threading.set_inter_op_parallelism_threads( args.inter_op_parallelism_threads) if args.gpu_allow_growth: for device in tf.config.experimental.list_physical_devices( device_type="GPU"): tf.config.experimental.set_memory_growth(device, enable=True) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) if not tf.io.gfile.exists(config["model_dir"]): tf.get_logger().info("Creating model directory %s", config["model_dir"]) tf.io.gfile.makedirs(config["model_dir"]) model = load_model(config["model_dir"], model_file=args.model, model_name=args.model_type) runner = Runner(model, config, auto_config=args.auto_config, mixed_precision=args.mixed_precision, seed=args.seed) if args.run == "train": runner.train(num_devices=args.num_gpus, with_eval=args.with_eval, checkpoint_path=args.checkpoint_path) elif args.run == "eval": metrics = runner.evaluate(checkpoint_path=args.checkpoint_path, features_file=args.features_file, labels_file=args.labels_file) print(metrics) elif args.run == "infer": runner.infer(args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path, log_time=args.log_prediction_time) elif args.run == "export": runner.export(args.export_dir, checkpoint_path=args.checkpoint_path) elif args.run == "score": runner.score(args.features_file, args.predictions_file, checkpoint_path=args.checkpoint_path) elif args.run == "average_checkpoints": runner.average_checkpoints(args.output_dir, max_count=args.max_count) elif args.run == "update_vocab": runner.update_vocab(args.output_dir, src_vocab=args.src_vocab, tgt_vocab=args.tgt_vocab)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("run", choices=[ "train_and_eval", "train", "eval", "infer", "export", "score" ], help="Run type.") parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument("--model_type", default="", choices=list(classes_in_module(catalog)), help="Model type from the catalog.") parser.add_argument("--model", default="", help="Custom model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location." ) parser.add_argument("--features_file", default=[], nargs="+", help="Run inference on this file.") parser.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser.add_argument("--log_prediction_time", default=False, action="store_true", help="Logs some prediction time metrics.") parser.add_argument( "--checkpoint_path", default=None, help=("Checkpoint or directory to use for inference or export " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser.add_argument( "--chief_host", default="", help="hostname:port of the chief worker (for distributed training).") parser.add_argument( "--worker_hosts", default="", help=("Comma-separated list of hostname:port of workers " "(for distributed training).")) parser.add_argument( "--ps_hosts", default="", help=("Comma-separated list of hostname:port of parameter servers " "(for distributed training).")) parser.add_argument( "--task_type", default="chief", choices=["chief", "worker", "ps", "evaluator"], help="Type of the task to run (for distributed training).") parser.add_argument("--task_index", type=int, default=0, help="ID of the task (for distributed training).") parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"], help="Logs verbosity.") parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument("--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.") parser.add_argument( "--intra_op_parallelism_threads", type=int, default=0, help=("Number of intra op threads (0 means the system picks " "an appropriate number).")) parser.add_argument( "--inter_op_parallelism_threads", type=int, default=0, help=("Number of inter op threads (0 means the system picks " "an appropriate number).")) parser.add_argument( "--session_config", default=None, help=( "Path to a file containing a tf.ConfigProto message in text format " "and used to create the TensorFlow sessions.")) args = parser.parse_args() tf.logging.set_verbosity(getattr(tf.logging, args.log_level)) # Setup cluster if defined. if args.chief_host: os.environ["TF_CONFIG"] = json.dumps({ "cluster": { "chief": [args.chief_host], "worker": args.worker_hosts.split(","), "ps": args.ps_hosts.split(",") }, "task": { "type": args.task_type, "index": args.task_index } }) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) is_chief = args.task_type == "chief" if is_chief and not tf.gfile.Exists(config["model_dir"]): tf.logging.info("Creating model directory %s", config["model_dir"]) tf.gfile.MakeDirs(config["model_dir"]) model = load_model(config["model_dir"], model_file=args.model, model_name=args.model_type, serialize_model=is_chief) session_config = tf.ConfigProto( intra_op_parallelism_threads=args.intra_op_parallelism_threads, inter_op_parallelism_threads=args.inter_op_parallelism_threads) if args.session_config is not None: with open(args.session_config, "rb") as session_config_file: text_format.Merge(session_config_file.read(), session_config) runner = Runner(model, config, seed=args.seed, num_devices=args.num_gpus, gpu_allow_growth=args.gpu_allow_growth, session_config=session_config) if args.run == "train_and_eval": runner.train_and_evaluate() elif args.run == "train": runner.train() elif args.run == "eval": runner.evaluate(checkpoint_path=args.checkpoint_path) elif args.run == "infer": if not args.features_file: parser.error("--features_file is required for inference.") elif len(args.features_file) == 1: args.features_file = args.features_file[0] runner.infer(args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path, log_time=args.log_prediction_time) elif args.run == "export": runner.export(checkpoint_path=args.checkpoint_path) elif args.run == "score": if not args.features_file: parser.error("--features_file is required for scoring.") if not args.predictions_file: parser.error("--predictions_file is required for scoring.") runner.score(args.features_file, args.predictions_file, checkpoint_path=args.checkpoint_path)
# GA_TRACKING_ID = os.environ['GA_TRACKING_ID'] db = SQLAlchemy(app) bcrypt = Bcrypt(app) login_manager = LoginManager(app) login_manager.login_view = 'login' login_manager.login_message_category = 'info' avi_vectorizer = load( os.path.join('website', 'static', 'arabic_variety_identification', 'dialect_identification.vec')) avi_model = load( os.path.join('website', 'static', 'arabic_variety_identification', 'dialect_identification.mdl')) ca_config = load_config( ["diacritizer/opennmt-defaults.yml", "diacritizer_ca/toy-ende.yml"]) ca_model = load_model(ca_config["model_dir"], model_file="", model_name="", serialize_model=False) ca_runner = Runner(ca_model, ca_config, seed=None, num_devices=1, gpu_allow_growth=False) msa_config = load_config( ["diacritizer/opennmt-defaults.yml", "diacritizer_msa/toy-ende.yml"]) msa_model = load_model(msa_config["model_dir"], model_file="", model_name="",
def create_description(path): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) # parser.add_argument("run", default="infer",help="Run type.") parser.add_argument("--config", nargs="+",default=['config/opennmt-defaults.yml', 'config/default.yml'],help="List of configuration files.") parser.add_argument("--auto_config", default=False, action="store_true", help="Enable automatic configuration values.") parser.add_argument("--model_type", default="", choices=list(classes_in_module(catalog, public_only=True)), help="Model type from the catalog.") parser.add_argument("--model", default="", help="Custom model configuration file.") parser.add_argument("--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument("--data_dir", default="", help="If set, data files are expected to be relative to this location.") parser.add_argument("--features_file", default=[path+'test.csv'], nargs="+", help="Run inference on this file.") parser.add_argument("--predictions_file", default=path+"output.out", help=("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser.add_argument("--log_prediction_time", default=False, action="store_true", help="Logs some prediction time metrics.") parser.add_argument("--checkpoint_path", default='model/model.ckpt-600000', help=("Checkpoint or directory to use for inference or export " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument("--export_dir_base", default=None, help="The base directory of the exported model.") parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser.add_argument("--chief_host", default="", help="hostname:port of the chief worker (for distributed training).") parser.add_argument("--worker_hosts", default="", help=("Comma-separated list of hostname:port of workers " "(for distributed training).")) parser.add_argument("--ps_hosts", default="", help=("Comma-separated list of hostname:port of parameter servers " "(for distributed training).")) parser.add_argument("--task_type", default="chief", choices=["chief", "worker", "ps", "evaluator"], help="Type of the task to run (for distributed training).") parser.add_argument("--task_index", type=int, default=0, help="ID of the task (for distributed training).") parser.add_argument("--horovod", default=False, action="store_true", help="Enable Horovod support for this run.") parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"], help="Logs verbosity.") parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument("--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.") parser.add_argument("--intra_op_parallelism_threads", type=int, default=0, help=("Number of intra op threads (0 means the system picks " "an appropriate number).")) parser.add_argument("--inter_op_parallelism_threads", type=int, default=0, help=("Number of inter op threads (0 means the system picks " "an appropriate number).")) parser.add_argument("--session_config", default=None, help=("Path to a file containing a tf.ConfigProto message in text format " "and used to create the TensorFlow sessions.")) parser.add_argument("--json",default=None,required=True,help=("input data as json string")) args = parser.parse_args() #inp = args.inputstring #print (inp) print (args) tf.compat.v1.logging.set_verbosity(getattr(tf.compat.v1.logging, args.log_level)) # Setup cluster if defined. if args.chief_host: if args.run != "train_and_eval": raise ValueError("Distributed training is only supported with the train_and_eval run type") os.environ["TF_CONFIG"] = json.dumps({ "cluster": { "chief": [args.chief_host], "worker": args.worker_hosts.split(","), "ps": args.ps_hosts.split(",") }, "task": { "type": args.task_type, "index": args.task_index } }) # Initialize Horovd if defined. if args.horovod: import horovod.tensorflow as hvd hvd.init() is_chief = hvd.rank() == 0 else: hvd = None is_chief = args.task_type == "chief" # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) if is_chief and not tf.io.gfile.exists(config["model_dir"]): tf.logging.info("Creating model directory %s", config["model_dir"]) tf.gfile.MakeDirs(config["model_dir"]) model = load_model( config["model_dir"], model_file=args.model, model_name=args.model_type, serialize_model=is_chief) session_config = tf.compat.v1.ConfigProto( intra_op_parallelism_threads=args.intra_op_parallelism_threads, inter_op_parallelism_threads=args.inter_op_parallelism_threads) # gpu_options=tf.GPUOptions( # allow_growth=args.gpu_allow_growth)) if args.session_config is not None: with open(args.session_config, "rb") as session_config_file: text_format.Merge(session_config_file.read(), session_config) try: #if True: #add 11/25,change json string to file with open(args.json,"r") as load_f: #data = json.loads(args.json) data = json.load(load_f) # print (data["category1"]) savedata(data,path) except: print ("json is incorrected") exit(1) runner = Runner( model, config, seed=args.seed, #num_devices=args.num_gpus, session_config=session_config, auto_config=args.auto_config, hvd=hvd) if not args.features_file: parser.error("--features_file is required for inference.") elif len(args.features_file) == 1: args.features_file = args.features_file[0] print ("begin to run....") runner.infer( args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path, log_time=args.log_prediction_time) description = normalize.normalize(path+"output.out",data) data["description"] = description json_str = json.dumps(data,ensure_ascii=False) return (json_str)
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("config", nargs="+", help="Configuration files.") args = parser.parse_args() config = load_config(args.config) print(yaml.dump(config, default_flow_style=False))