def testLoadModel(self): model_name = "NMTSmall" model_dir = self.get_temp_dir() model = config.load_model(model_dir, model_name=model_name) self.assertIsInstance(model, Model) model = config.load_model(model_dir) self.assertIsInstance(model, Model)
def testLoadModel(self): model_name = "Transformer" model_dir = self.get_temp_dir() model = config.load_model(model_dir, model_name=model_name) self.assertTrue(os.path.exists(os.path.join(model_dir, "model_description.py"))) self.assertIsInstance(model, Model) model = config.load_model(model_dir) self.assertIsInstance(model, Model)
def testLoadModelFileOverride(self): model_dir = self.get_temp_dir() saved_description_path = os.path.join(model_dir, "model_description.py") model_file = self._writeCustomModel(filename="test_model1.py", return_value=1) config.load_model(model_dir, model_file=model_file) self.assertTrue(filecmp.cmp(model_file, saved_description_path)) model_file = self._writeCustomModel(filename="test_model2.py", return_value=2) config.load_model(model_dir, model_file=model_file) self.assertTrue(filecmp.cmp(model_file, saved_description_path))
def testLoadModelFile(self): model_file = self._writeCustomModel() model_dir = self.get_temp_dir() model = config.load_model(model_dir, model_file=model_file) saved_description_path = os.path.join(model_dir, "model_description.py") self.assertTrue(os.path.exists(saved_description_path)) self.assertTrue(filecmp.cmp(model_file, saved_description_path)) self.assertEqual(model, 42) model = config.load_model(model_dir) self.assertEqual(model, 42)
def testLoadModel(self, model_name, as_builder): def _check_model(model): if as_builder: self.assertTrue(model, callable) model = model() self.assertIsInstance(model, Model) model_dir = self.get_temp_dir() _check_model( config.load_model(model_dir, model_name=model_name, as_builder=as_builder) ) self.assertTrue(os.path.exists(os.path.join(model_dir, "model_description.py"))) _check_model(config.load_model(model_dir, as_builder=as_builder))
def get_runners(): config_one = load_config('avg_data.yml') model_one = load_model( config_one["model_dir"], model_file='', model_name='', serialize_model=True) session_config_one = tf.ConfigProto( intra_op_parallelism_threads=0, inter_op_parallelism_threads=0, gpu_options=tf.GPUOptions( allow_growth=False)) runner_one = Runner( model_one, config_one, seed=None, num_devices=1, session_config=session_config_one, auto_config=True) config_two = load_config('avg_data_rev.yml') model_two = load_model( config_two["model_dir"], model_file='', model_name='', serialize_model=True) session_config_two = tf.ConfigProto( intra_op_parallelism_threads=0, inter_op_parallelism_threads=0, gpu_options=tf.GPUOptions( allow_growth=False)) runner_two = Runner( model_two, config_two, seed=None, num_devices=1, session_config=session_config_two, auto_config=True) return runner_one, runner_two
def testLoadModel(self, model_name, as_builder): def _check_model(model): if as_builder: self.assertTrue(model, callable) model = model() self.assertIsInstance(model, Model) model_dir = self.get_temp_dir() _check_model( config.load_model(model_dir, model_name=model_name, as_builder=as_builder)) self.assertTrue( os.path.exists( os.path.join(model_dir, config.MODEL_DESCRIPTION_FILENAME))) _check_model(config.load_model(model_dir, as_builder=as_builder))
def testLoadModelDescriptionCompat(self): model_dir = self.get_temp_dir() description = os.path.join(model_dir, "model_description.py") with open(description, "w") as description_file: description_file.write("from opennmt.models import catalog\n") description_file.write("model = catalog.Transformer\n") model = config.load_model(model_dir) self.assertIsInstance(model, Model)
def __init__(self, model_config=None): if not model_config: raise ValueError("distill model must include model_config.") os.makedirs(model_config["teacher"]["model_dir"], exist_ok=True) os.makedirs(model_config["student"]["model_dir"], exist_ok=True) teacher_model = load_model( model_config["teacher"]["model_dir"], model_file=model_config["teacher"].get("model", None), model_name=model_config["teacher"].get("model_type", None)) student_model = load_model( model_config["student"]["model_dir"], model_file=model_config["student"].get("model", None), model_name=model_config["student"].get("model_type", None)) super(BaseDistill, self).__init__( teacher_model=teacher_model, student_model=student_model, distill_loss_rate=model_config.get("distill_loss_rate", 0.75), student_loss_rate=model_config.get("student_loss_rate", 0.25), distill_temperature=model_config.get("distill_temperature", 2.0))
def _getTransliterationRunner(self, base_config=None, model_version="v2"): model_dir = os.path.join(self.get_temp_dir(), "model") shutil.copytree(os.path.join(test_data, "transliteration-aren-v2", model_version), model_dir) config = {} config["model_dir"] = model_dir config["data"] = { "source_vocabulary": os.path.join(model_dir, "ar.vocab"), "target_vocabulary": os.path.join(model_dir, "en.vocab"), } if base_config is not None: config = misc.merge_dict(config, base_config) model = load_model(model_dir) runner = Runner(model, config) return runner
def _load_model(self, model_type=None, model_file=None, model_path=None): """Returns the model directory and the model instances. If model_path is not None, the model files are copied in the current working directory ${WORKSPACE_DIR}/output/model/. """ model_dir = os.path.join(self._output_dir, "model") if os.path.exists(model_dir): shutil.rmtree(model_dir) os.makedirs(model_dir) if model_path is not None: for filename in os.listdir(model_path): path = os.path.join(model_path, filename) if os.path.isfile(path): shutil.copy(path, model_dir) model = load_model(model_dir, model_file=model_file, model_name=model_type) return model_dir, model
def __init__(self, config): """ Configuration for the model :config: the configuration for the model. -- :config_path: a list of path to configure the model -- :model_type: a model type -- :check_point_path: a check_point for the path """ self.__config = {} for config_path in config['config_path']: with open(config_path, 'r') as f: self.__config.update(yaml.load(f.read())) self.__config['model_type'] = config['model_type'] self.__config['checkpoint_path'] = config['checkpoint_path'] model = load_model(self.__config['model_dir'], model_name=self.__config['model_type']) self.model = Runner(model, self.__config, auto_config=config['auto_config'])
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-v", "--version", action="version", version="OpenNMT-tf %s" % __version__) parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument( "--auto_config", default=False, action="store_true", help="Enable automatic configuration values.", ) parser.add_argument( "--model_type", default="", choices=list(sorted(catalog.list_model_names_from_catalog())), help="Model type from the catalog.", ) parser.add_argument("--model", default="", help="Custom model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.", ) parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location.", ) parser.add_argument( "--checkpoint_path", default=None, help=("Specific checkpoint or model directory to load " "(when a directory is set, the latest checkpoint is used)."), ) parser.add_argument( "--log_level", default="INFO", choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"], help="Logs verbosity.", ) parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument( "--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.", ) parser.add_argument( "--intra_op_parallelism_threads", type=int, default=0, help=("Number of intra op threads (0 means the system picks " "an appropriate number)."), ) parser.add_argument( "--inter_op_parallelism_threads", type=int, default=0, help=("Number of inter op threads (0 means the system picks " "an appropriate number)."), ) parser.add_argument( "--mixed_precision", default=False, action="store_true", help="Enable mixed precision.", ) parser.add_argument( "--eager_execution", default=False, action="store_true", help="Enable TensorFlow eager execution.", ) subparsers = parser.add_subparsers(help="Run type.", dest="run_type") subparsers.required = True parser_train = subparsers.add_parser("train", help="Training.") parser_train.add_argument( "--with_eval", default=False, action="store_true", help="Enable automatic evaluation.", ) parser_train.add_argument( "--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.", ) parser_train.add_argument( "--horovod", default=False, action="store_true", help="Enable Horovod training mode.", ) parser_eval = subparsers.add_parser("eval", help="Evaluation.") parser_eval.add_argument("--features_file", nargs="+", default=None, help="Input features files.") parser_eval.add_argument("--labels_file", default=None, help="Output labels files.") parser_infer = subparsers.add_parser("infer", help="Inference.") parser_infer.add_argument("--features_file", nargs="+", required=True, help="Run inference on this file.") parser_infer.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output."), ) parser_infer.add_argument( "--log_prediction_time", default=False, action="store_true", help="Logs some prediction time metrics.", ) parser_export = subparsers.add_parser("export", help="Model export.") parser_export.add_argument( "--output_dir", "--export_dir", required=True, help="The directory of the exported model.", ) parser_export.add_argument( "--format", "--export_format", choices=exporters.list_exporters(), default="saved_model", help="Format of the exported model.", ) parser_score = subparsers.add_parser("score", help="Scoring.") parser_score.add_argument("--features_file", nargs="+", required=True, help="Features file.") parser_score.add_argument("--predictions_file", default=None, help="Predictions to score.") parser_average_checkpoints = subparsers.add_parser( "average_checkpoints", help="Checkpoint averaging.") parser_average_checkpoints.add_argument( "--output_dir", required=True, help="The output directory for the averaged checkpoint.", ) parser_average_checkpoints.add_argument( "--max_count", type=int, default=8, help="The maximal number of checkpoints to average.", ) parser_update_vocab = subparsers.add_parser( "update_vocab", help="Update model vocabularies in checkpoint.") parser_update_vocab.add_argument( "--output_dir", required=True, help="The output directory for the updated checkpoint.", ) parser_update_vocab.add_argument("--src_vocab", default=None, help="Path to the new source vocabulary.") parser_update_vocab.add_argument("--tgt_vocab", default=None, help="Path to the new target vocabulary.") # When using an option that takes multiple values just before the run type, # the run type is treated as a value of this option. To fix this issue, we # inject a placeholder option just before the run type to clearly separate it. parser.add_argument("--placeholder", action="store_true", help=argparse.SUPPRESS) run_types = set(subparsers.choices.keys()) args = sys.argv[1:] for i, arg in enumerate(args): if arg in run_types: args.insert(i, "--placeholder") break args = parser.parse_args(args) if (hasattr(args, "features_file") and args.features_file and len(args.features_file) == 1): args.features_file = args.features_file[0] _initialize_logging(getattr(logging, args.log_level)) tf.config.threading.set_intra_op_parallelism_threads( args.intra_op_parallelism_threads) tf.config.threading.set_inter_op_parallelism_threads( args.inter_op_parallelism_threads) if args.eager_execution: tf.config.run_functions_eagerly(True) gpus = tf.config.list_physical_devices(device_type="GPU") if hasattr(args, "horovod") and args.horovod: import horovod.tensorflow as hvd hvd.init() is_master = hvd.rank() == 0 if gpus: local_gpu = gpus[hvd.local_rank()] tf.config.set_visible_devices(local_gpu, device_type="GPU") gpus = [local_gpu] else: hvd = None is_master = True if args.gpu_allow_growth: for device in gpus: tf.config.experimental.set_memory_growth(device, enable=True) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) if is_master and not tf.io.gfile.exists(config["model_dir"]): tf.get_logger().info("Creating model directory %s", config["model_dir"]) tf.io.gfile.makedirs(config["model_dir"]) model = load_model( config["model_dir"], model_file=args.model, model_name=args.model_type, serialize_model=is_master, as_builder=True, ) runner = Runner( model, config, auto_config=args.auto_config, mixed_precision=args.mixed_precision, seed=args.seed, ) if args.run_type == "train": runner.train( num_devices=args.num_gpus, with_eval=args.with_eval, checkpoint_path=args.checkpoint_path, hvd=hvd, ) elif args.run_type == "eval": metrics = runner.evaluate( checkpoint_path=args.checkpoint_path, features_file=args.features_file, labels_file=args.labels_file, ) print(metrics) elif args.run_type == "infer": runner.infer( args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path, log_time=args.log_prediction_time, ) elif args.run_type == "export": runner.export( args.output_dir, checkpoint_path=args.checkpoint_path, exporter=exporters.make_exporter(args.format), ) elif args.run_type == "score": runner.score( args.features_file, args.predictions_file, checkpoint_path=args.checkpoint_path, ) elif args.run_type == "average_checkpoints": runner.average_checkpoints(args.output_dir, max_count=args.max_count) elif args.run_type == "update_vocab": runner.update_vocab(args.output_dir, src_vocab=args.src_vocab, tgt_vocab=args.tgt_vocab)
def testLoadModelInvalidArguments(self): with self.assertRaises(ValueError): config.load_model(self.get_temp_dir(), model_file="a", model_name="b")
def create_description(path): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) # parser.add_argument("run", default="infer",help="Run type.") parser.add_argument("--config", nargs="+",default=['config/opennmt-defaults.yml', 'config/default.yml'],help="List of configuration files.") parser.add_argument("--auto_config", default=False, action="store_true", help="Enable automatic configuration values.") parser.add_argument("--model_type", default="", choices=list(classes_in_module(catalog, public_only=True)), help="Model type from the catalog.") parser.add_argument("--model", default="", help="Custom model configuration file.") parser.add_argument("--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument("--data_dir", default="", help="If set, data files are expected to be relative to this location.") parser.add_argument("--features_file", default=[path+'test.csv'], nargs="+", help="Run inference on this file.") parser.add_argument("--predictions_file", default=path+"output.out", help=("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser.add_argument("--log_prediction_time", default=False, action="store_true", help="Logs some prediction time metrics.") parser.add_argument("--checkpoint_path", default='model/model.ckpt-600000', help=("Checkpoint or directory to use for inference or export " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument("--export_dir_base", default=None, help="The base directory of the exported model.") parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser.add_argument("--chief_host", default="", help="hostname:port of the chief worker (for distributed training).") parser.add_argument("--worker_hosts", default="", help=("Comma-separated list of hostname:port of workers " "(for distributed training).")) parser.add_argument("--ps_hosts", default="", help=("Comma-separated list of hostname:port of parameter servers " "(for distributed training).")) parser.add_argument("--task_type", default="chief", choices=["chief", "worker", "ps", "evaluator"], help="Type of the task to run (for distributed training).") parser.add_argument("--task_index", type=int, default=0, help="ID of the task (for distributed training).") parser.add_argument("--horovod", default=False, action="store_true", help="Enable Horovod support for this run.") parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"], help="Logs verbosity.") parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument("--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.") parser.add_argument("--intra_op_parallelism_threads", type=int, default=0, help=("Number of intra op threads (0 means the system picks " "an appropriate number).")) parser.add_argument("--inter_op_parallelism_threads", type=int, default=0, help=("Number of inter op threads (0 means the system picks " "an appropriate number).")) parser.add_argument("--session_config", default=None, help=("Path to a file containing a tf.ConfigProto message in text format " "and used to create the TensorFlow sessions.")) parser.add_argument("--json",default=None,required=True,help=("input data as json string")) args = parser.parse_args() #inp = args.inputstring #print (inp) print (args) tf.compat.v1.logging.set_verbosity(getattr(tf.compat.v1.logging, args.log_level)) # Setup cluster if defined. if args.chief_host: if args.run != "train_and_eval": raise ValueError("Distributed training is only supported with the train_and_eval run type") os.environ["TF_CONFIG"] = json.dumps({ "cluster": { "chief": [args.chief_host], "worker": args.worker_hosts.split(","), "ps": args.ps_hosts.split(",") }, "task": { "type": args.task_type, "index": args.task_index } }) # Initialize Horovd if defined. if args.horovod: import horovod.tensorflow as hvd hvd.init() is_chief = hvd.rank() == 0 else: hvd = None is_chief = args.task_type == "chief" # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) if is_chief and not tf.io.gfile.exists(config["model_dir"]): tf.logging.info("Creating model directory %s", config["model_dir"]) tf.gfile.MakeDirs(config["model_dir"]) model = load_model( config["model_dir"], model_file=args.model, model_name=args.model_type, serialize_model=is_chief) session_config = tf.compat.v1.ConfigProto( intra_op_parallelism_threads=args.intra_op_parallelism_threads, inter_op_parallelism_threads=args.inter_op_parallelism_threads) # gpu_options=tf.GPUOptions( # allow_growth=args.gpu_allow_growth)) if args.session_config is not None: with open(args.session_config, "rb") as session_config_file: text_format.Merge(session_config_file.read(), session_config) try: #if True: #add 11/25,change json string to file with open(args.json,"r") as load_f: #data = json.loads(args.json) data = json.load(load_f) # print (data["category1"]) savedata(data,path) except: print ("json is incorrected") exit(1) runner = Runner( model, config, seed=args.seed, #num_devices=args.num_gpus, session_config=session_config, auto_config=args.auto_config, hvd=hvd) if not args.features_file: parser.error("--features_file is required for inference.") elif len(args.features_file) == 1: args.features_file = args.features_file[0] print ("begin to run....") runner.infer( args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path, log_time=args.log_prediction_time) description = normalize.normalize(path+"output.out",data) data["description"] = description json_str = json.dumps(data,ensure_ascii=False) return (json_str)
bcrypt = Bcrypt(app) login_manager = LoginManager(app) login_manager.login_view = 'login' login_manager.login_message_category = 'info' avi_vectorizer = load( os.path.join('website', 'static', 'arabic_variety_identification', 'dialect_identification.vec')) avi_model = load( os.path.join('website', 'static', 'arabic_variety_identification', 'dialect_identification.mdl')) ca_config = load_config( ["diacritizer/opennmt-defaults.yml", "diacritizer_ca/toy-ende.yml"]) ca_model = load_model(ca_config["model_dir"], model_file="", model_name="", serialize_model=False) ca_runner = Runner(ca_model, ca_config, seed=None, num_devices=1, gpu_allow_growth=False) msa_config = load_config( ["diacritizer/opennmt-defaults.yml", "diacritizer_msa/toy-ende.yml"]) msa_model = load_model(msa_config["model_dir"], model_file="", model_name="", serialize_model=False) msa_runner = Runner(msa_model, msa_config,
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("run", choices=[ "train_and_eval", "train", "eval", "infer", "export", "score" ], help="Run type.") parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument("--model_type", default="", choices=list(classes_in_module(catalog)), help="Model type from the catalog.") parser.add_argument("--model", default="", help="Custom model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location." ) parser.add_argument("--features_file", default=[], nargs="+", help="Run inference on this file.") parser.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser.add_argument("--log_prediction_time", default=False, action="store_true", help="Logs some prediction time metrics.") parser.add_argument( "--checkpoint_path", default=None, help=("Checkpoint or directory to use for inference or export " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser.add_argument( "--chief_host", default="", help="hostname:port of the chief worker (for distributed training).") parser.add_argument( "--worker_hosts", default="", help=("Comma-separated list of hostname:port of workers " "(for distributed training).")) parser.add_argument( "--ps_hosts", default="", help=("Comma-separated list of hostname:port of parameter servers " "(for distributed training).")) parser.add_argument( "--task_type", default="chief", choices=["chief", "worker", "ps", "evaluator"], help="Type of the task to run (for distributed training).") parser.add_argument("--task_index", type=int, default=0, help="ID of the task (for distributed training).") parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"], help="Logs verbosity.") parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument("--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.") parser.add_argument( "--intra_op_parallelism_threads", type=int, default=0, help=("Number of intra op threads (0 means the system picks " "an appropriate number).")) parser.add_argument( "--inter_op_parallelism_threads", type=int, default=0, help=("Number of inter op threads (0 means the system picks " "an appropriate number).")) parser.add_argument( "--session_config", default=None, help=( "Path to a file containing a tf.ConfigProto message in text format " "and used to create the TensorFlow sessions.")) args = parser.parse_args() tf.logging.set_verbosity(getattr(tf.logging, args.log_level)) # Setup cluster if defined. if args.chief_host: os.environ["TF_CONFIG"] = json.dumps({ "cluster": { "chief": [args.chief_host], "worker": args.worker_hosts.split(","), "ps": args.ps_hosts.split(",") }, "task": { "type": args.task_type, "index": args.task_index } }) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) is_chief = args.task_type == "chief" if is_chief and not tf.gfile.Exists(config["model_dir"]): tf.logging.info("Creating model directory %s", config["model_dir"]) tf.gfile.MakeDirs(config["model_dir"]) model = load_model(config["model_dir"], model_file=args.model, model_name=args.model_type, serialize_model=is_chief) session_config = tf.ConfigProto( intra_op_parallelism_threads=args.intra_op_parallelism_threads, inter_op_parallelism_threads=args.inter_op_parallelism_threads) if args.session_config is not None: with open(args.session_config, "rb") as session_config_file: text_format.Merge(session_config_file.read(), session_config) runner = Runner(model, config, seed=args.seed, num_devices=args.num_gpus, gpu_allow_growth=args.gpu_allow_growth, session_config=session_config) if args.run == "train_and_eval": runner.train_and_evaluate() elif args.run == "train": runner.train() elif args.run == "eval": runner.evaluate(checkpoint_path=args.checkpoint_path) elif args.run == "infer": if not args.features_file: parser.error("--features_file is required for inference.") elif len(args.features_file) == 1: args.features_file = args.features_file[0] runner.infer(args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path, log_time=args.log_prediction_time) elif args.run == "export": runner.export(checkpoint_path=args.checkpoint_path) elif args.run == "score": if not args.features_file: parser.error("--features_file is required for scoring.") if not args.predictions_file: parser.error("--predictions_file is required for scoring.") runner.score(args.features_file, args.predictions_file, checkpoint_path=args.checkpoint_path)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-v", "--version", action="version", version="OpenNMT-tf %s" % __version__) parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument("--auto_config", default=False, action="store_true", help="Enable automatic configuration values.") parser.add_argument("--model_type", default="", choices=list( classes_in_module(catalog, public_only=True)), help="Model type from the catalog.") parser.add_argument("--model", default="", help="Custom model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location." ) parser.add_argument( "--checkpoint_path", default=None, help=("Specific checkpoint or model directory to load " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument( "--log_level", default="INFO", choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"], help="Logs verbosity.") parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument("--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.") parser.add_argument( "--intra_op_parallelism_threads", type=int, default=0, help=("Number of intra op threads (0 means the system picks " "an appropriate number).")) parser.add_argument( "--inter_op_parallelism_threads", type=int, default=0, help=("Number of inter op threads (0 means the system picks " "an appropriate number).")) parser.add_argument("--mixed_precision", default=False, action="store_true", help="Enable mixed precision.") subparsers = parser.add_subparsers(help="Run type.", dest="run") parser_train = subparsers.add_parser("train", help="Training.") parser_train.add_argument("--with_eval", default=False, action="store_true", help="Enable automatic evaluation.") parser_train.add_argument( "--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser_eval = subparsers.add_parser("eval", help="Evaluation.") parser_eval.add_argument("--features_file", nargs="+", default=None, help="Input features files.") parser_eval.add_argument("--labels_file", default=None, help="Output labels files.") parser_infer = subparsers.add_parser("infer", help="Inference.") parser_infer.add_argument("--features_file", nargs="+", required=True, help="Run inference on this file.") parser_infer.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser_infer.add_argument("--log_prediction_time", default=False, action="store_true", help="Logs some prediction time metrics.") parser_export = subparsers.add_parser("export", help="Model export.") parser_export.add_argument("--export_dir", required=True, help="The directory of the exported model.") parser_score = subparsers.add_parser("score", help="Scoring.") parser_score.add_argument("--features_file", nargs="+", required=True, help="Features file.") parser_score.add_argument("--predictions_file", default=None, help="Predictions to score.") parser_average_checkpoints = subparsers.add_parser( "average_checkpoints", help="Checkpoint averaging.") parser_average_checkpoints.add_argument( "--output_dir", required=True, help="The output directory for the averaged checkpoint.") parser_average_checkpoints.add_argument( "--max_count", type=int, default=8, help="The maximal number of checkpoints to average.") parser_update_vocab = subparsers.add_parser( "update_vocab", help="Update model vocabularies in checkpoint.") parser_update_vocab.add_argument( "--output_dir", required=True, help="The output directory for the updated checkpoint.") parser_update_vocab.add_argument("--src_vocab", default=None, help="Path to the new source vocabulary.") parser_update_vocab.add_argument("--tgt_vocab", default=None, help="Path to the new target vocabulary.") args = parser.parse_args() if hasattr(args, "features_file") and args.features_file and len( args.features_file) == 1: args.features_file = args.features_file[0] _set_log_level(getattr(logging, args.log_level)) tf.config.threading.set_intra_op_parallelism_threads( args.intra_op_parallelism_threads) tf.config.threading.set_inter_op_parallelism_threads( args.inter_op_parallelism_threads) if args.gpu_allow_growth: for device in tf.config.experimental.list_physical_devices( device_type="GPU"): tf.config.experimental.set_memory_growth(device, enable=True) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) if not tf.io.gfile.exists(config["model_dir"]): tf.get_logger().info("Creating model directory %s", config["model_dir"]) tf.io.gfile.makedirs(config["model_dir"]) model = load_model(config["model_dir"], model_file=args.model, model_name=args.model_type) runner = Runner(model, config, auto_config=args.auto_config, mixed_precision=args.mixed_precision, seed=args.seed) if args.run == "train": runner.train(num_devices=args.num_gpus, with_eval=args.with_eval, checkpoint_path=args.checkpoint_path) elif args.run == "eval": metrics = runner.evaluate(checkpoint_path=args.checkpoint_path, features_file=args.features_file, labels_file=args.labels_file) print(metrics) elif args.run == "infer": runner.infer(args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path, log_time=args.log_prediction_time) elif args.run == "export": runner.export(args.export_dir, checkpoint_path=args.checkpoint_path) elif args.run == "score": runner.score(args.features_file, args.predictions_file, checkpoint_path=args.checkpoint_path) elif args.run == "average_checkpoints": runner.average_checkpoints(args.output_dir, max_count=args.max_count) elif args.run == "update_vocab": runner.update_vocab(args.output_dir, src_vocab=args.src_vocab, tgt_vocab=args.tgt_vocab)
def testLoadModelMissingModel(self): with self.assertRaises(RuntimeError): config.load_model(self.get_temp_dir())
def testLoadModelInvalidInvalidFile(self): with self.assertRaisesRegex(ValueError, "not found"): config.load_model(self.get_temp_dir(), model_file="a")
def testLoadModelInvalidInvalidName(self): with self.assertRaisesRegex(ValueError, "does not exist"): config.load_model(self.get_temp_dir(), model_name="b")
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "run", choices=["train_and_eval", "train", "eval", "infer", "export"], help="Run type.") parser.add_argument("--config", required=True, nargs="+", help="List of configuration files.") parser.add_argument("--model", default="", help="Model configuration file.") parser.add_argument( "--run_dir", default="", help="If set, model_dir will be created relative to this location.") parser.add_argument( "--data_dir", default="", help="If set, data files are expected to be relative to this location." ) parser.add_argument("--features_file", default=[], nargs="+", help="Run inference on this file.") parser.add_argument( "--predictions_file", default="", help= ("File used to save predictions. If not set, predictions are printed " "on the standard output.")) parser.add_argument( "--checkpoint_path", default=None, help=("Checkpoint or directory to use for inference or export " "(when a directory is set, the latest checkpoint is used).")) parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use for in-graph replication.") parser.add_argument( "--chief_host", default="", help="hostname:port of the chief worker (for distributed training).") parser.add_argument( "--worker_hosts", default="", help=("Comma-separated list of hostname:port of workers " "(for distributed training).")) parser.add_argument( "--ps_hosts", default="", help=("Comma-separated list of hostname:port of parameter servers " "(for distributed training).")) parser.add_argument( "--task_type", default="chief", choices=["chief", "worker", "ps", "evaluator"], help="Type of the task to run (for distributed training).") parser.add_argument("--task_index", type=int, default=0, help="ID of the task (for distributed training).") parser.add_argument("--log_level", default="INFO", choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"], help="Logs verbosity.") parser.add_argument("--seed", type=int, default=None, help="Random seed.") parser.add_argument("--gpu_allow_growth", default=False, action="store_true", help="Allocate GPU memory dynamically.") args = parser.parse_args() tf.logging.set_verbosity(getattr(tf.logging, args.log_level)) # Setup cluster if defined. if args.chief_host: os.environ["TF_CONFIG"] = json.dumps({ "cluster": { "chief": [args.chief_host], "worker": args.worker_hosts.split(","), "ps": args.ps_hosts.split(",") }, "task": { "type": args.task_type, "index": args.task_index } }) # Load and merge run configurations. config = load_config(args.config) if args.run_dir: config["model_dir"] = os.path.join(args.run_dir, config["model_dir"]) if args.data_dir: config["data"] = _prefix_paths(args.data_dir, config["data"]) if not os.path.isdir(config["model_dir"]): tf.logging.info("Creating model directory %s", config["model_dir"]) os.makedirs(config["model_dir"]) model = load_model(config["model_dir"], model_file=args.model) runner = Runner(model, config, seed=args.seed, num_devices=args.num_gpus, gpu_allow_growth=args.gpu_allow_growth) if args.run == "train_and_eval": runner.train_and_evaluate() elif args.run == "train": runner.train() elif args.run == "eval": runner.evaluate(checkpoint_path=args.checkpoint_path) elif args.run == "infer": if not args.features_file: parser.error("--features_file is required for inference.") elif len(args.features_file) == 1: args.features_file = args.features_file[0] runner.infer(args.features_file, predictions_file=args.predictions_file, checkpoint_path=args.checkpoint_path) elif args.run == "export": runner.export(checkpoint_path=args.checkpoint_path)
from opennmt.models import SequenceToSequence from opennmt.config import load_model def load_config(config_path: str): """ Loads an OpenNMT config file Arguments: config_path: The path to the config file Returns: A dict containing the config data """ with open(config_path, encoding='utf-8') as f: return yaml.load(f) __dir = path.dirname(__file__) config_path = path.join(__dir, 'config.yml') model_file = path.join(__dir, 'nmt_small.py') config = load_config(config_path) model_dir = config['model_dir'] if not path.isdir(model_dir): makedirs(model_dir) model = load_model(model_dir, model_file=model_file)