Exemple #1
0
 def testLoadModel(self):
     model_name = "NMTSmall"
     model_dir = self.get_temp_dir()
     model = config.load_model(model_dir, model_name=model_name)
     self.assertIsInstance(model, Model)
     model = config.load_model(model_dir)
     self.assertIsInstance(model, Model)
Exemple #2
0
 def testLoadModel(self):
   model_name = "Transformer"
   model_dir = self.get_temp_dir()
   model = config.load_model(model_dir, model_name=model_name)
   self.assertTrue(os.path.exists(os.path.join(model_dir, "model_description.py")))
   self.assertIsInstance(model, Model)
   model = config.load_model(model_dir)
   self.assertIsInstance(model, Model)
Exemple #3
0
 def testLoadModelFileOverride(self):
     model_dir = self.get_temp_dir()
     saved_description_path = os.path.join(model_dir, "model_description.py")
     model_file = self._writeCustomModel(filename="test_model1.py", return_value=1)
     config.load_model(model_dir, model_file=model_file)
     self.assertTrue(filecmp.cmp(model_file, saved_description_path))
     model_file = self._writeCustomModel(filename="test_model2.py", return_value=2)
     config.load_model(model_dir, model_file=model_file)
     self.assertTrue(filecmp.cmp(model_file, saved_description_path))
Exemple #4
0
 def testLoadModelFile(self):
     model_file = self._writeCustomModel()
     model_dir = self.get_temp_dir()
     model = config.load_model(model_dir, model_file=model_file)
     saved_description_path = os.path.join(model_dir, "model_description.py")
     self.assertTrue(os.path.exists(saved_description_path))
     self.assertTrue(filecmp.cmp(model_file, saved_description_path))
     self.assertEqual(model, 42)
     model = config.load_model(model_dir)
     self.assertEqual(model, 42)
Exemple #5
0
    def testLoadModel(self, model_name, as_builder):
        def _check_model(model):
            if as_builder:
                self.assertTrue(model, callable)
                model = model()
            self.assertIsInstance(model, Model)

        model_dir = self.get_temp_dir()
        _check_model(
            config.load_model(model_dir, model_name=model_name, as_builder=as_builder)
        )
        self.assertTrue(os.path.exists(os.path.join(model_dir, "model_description.py")))
        _check_model(config.load_model(model_dir, as_builder=as_builder))
def get_runners():
    config_one = load_config('avg_data.yml')

    model_one = load_model(
      config_one["model_dir"],
      model_file='',
      model_name='',
      serialize_model=True)


    session_config_one = tf.ConfigProto(
      intra_op_parallelism_threads=0,
      inter_op_parallelism_threads=0,
      gpu_options=tf.GPUOptions(
          allow_growth=False))

    runner_one = Runner(
      model_one,
      config_one,
      seed=None,
      num_devices=1,
      session_config=session_config_one,
      auto_config=True)


    config_two = load_config('avg_data_rev.yml')

    model_two = load_model(
      config_two["model_dir"],
      model_file='',
      model_name='',
      serialize_model=True)


    session_config_two = tf.ConfigProto(
      intra_op_parallelism_threads=0,
      inter_op_parallelism_threads=0,
      gpu_options=tf.GPUOptions(
          allow_growth=False))

    runner_two = Runner(
      model_two,
      config_two,
      seed=None,
      num_devices=1,
      session_config=session_config_two,
      auto_config=True)

    return runner_one, runner_two
Exemple #7
0
    def testLoadModel(self, model_name, as_builder):
        def _check_model(model):
            if as_builder:
                self.assertTrue(model, callable)
                model = model()
            self.assertIsInstance(model, Model)

        model_dir = self.get_temp_dir()
        _check_model(
            config.load_model(model_dir,
                              model_name=model_name,
                              as_builder=as_builder))
        self.assertTrue(
            os.path.exists(
                os.path.join(model_dir, config.MODEL_DESCRIPTION_FILENAME)))
        _check_model(config.load_model(model_dir, as_builder=as_builder))
Exemple #8
0
 def testLoadModelDescriptionCompat(self):
     model_dir = self.get_temp_dir()
     description = os.path.join(model_dir, "model_description.py")
     with open(description, "w") as description_file:
         description_file.write("from opennmt.models import catalog\n")
         description_file.write("model = catalog.Transformer\n")
     model = config.load_model(model_dir)
     self.assertIsInstance(model, Model)
Exemple #9
0
 def __init__(self, model_config=None):
     if not model_config:
         raise ValueError("distill model must include model_config.")
     os.makedirs(model_config["teacher"]["model_dir"], exist_ok=True)
     os.makedirs(model_config["student"]["model_dir"], exist_ok=True)
     teacher_model = load_model(
         model_config["teacher"]["model_dir"],
         model_file=model_config["teacher"].get("model", None),
         model_name=model_config["teacher"].get("model_type", None))
     student_model = load_model(
         model_config["student"]["model_dir"],
         model_file=model_config["student"].get("model", None),
         model_name=model_config["student"].get("model_type", None))
     super(BaseDistill, self).__init__(
         teacher_model=teacher_model,
         student_model=student_model,
         distill_loss_rate=model_config.get("distill_loss_rate", 0.75),
         student_loss_rate=model_config.get("student_loss_rate", 0.25),
         distill_temperature=model_config.get("distill_temperature", 2.0))
Exemple #10
0
 def _getTransliterationRunner(self, base_config=None, model_version="v2"):
   model_dir = os.path.join(self.get_temp_dir(), "model")
   shutil.copytree(os.path.join(test_data, "transliteration-aren-v2", model_version), model_dir)
   config = {}
   config["model_dir"] = model_dir
   config["data"] = {
       "source_vocabulary": os.path.join(model_dir, "ar.vocab"),
       "target_vocabulary": os.path.join(model_dir, "en.vocab"),
   }
   if base_config is not None:
     config = misc.merge_dict(config, base_config)
   model = load_model(model_dir)
   runner = Runner(model, config)
   return runner
Exemple #11
0
    def _load_model(self, model_type=None, model_file=None, model_path=None):
        """Returns the model directory and the model instances.

        If model_path is not None, the model files are copied in the current
        working directory ${WORKSPACE_DIR}/output/model/.
        """
        model_dir = os.path.join(self._output_dir, "model")
        if os.path.exists(model_dir):
            shutil.rmtree(model_dir)
        os.makedirs(model_dir)
        if model_path is not None:
            for filename in os.listdir(model_path):
                path = os.path.join(model_path, filename)
                if os.path.isfile(path):
                    shutil.copy(path, model_dir)
        model = load_model(model_dir, model_file=model_file, model_name=model_type)
        return model_dir, model
Exemple #12
0
    def __init__(self, config):
        """
        Configuration for the model
        :config: the configuration for the model.
          -- :config_path: a list of path to configure the model
          -- :model_type: a model type
          -- :check_point_path: a check_point for the path
        """
        self.__config = {}
        for config_path in config['config_path']:
            with open(config_path, 'r') as f:
                self.__config.update(yaml.load(f.read()))
        self.__config['model_type'] = config['model_type']
        self.__config['checkpoint_path'] = config['checkpoint_path']

        model = load_model(self.__config['model_dir'],
                           model_name=self.__config['model_type'])
        self.model = Runner(model,
                            self.__config,
                            auto_config=config['auto_config'])
Exemple #13
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version="OpenNMT-tf %s" % __version__)
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument(
        "--auto_config",
        default=False,
        action="store_true",
        help="Enable automatic configuration values.",
    )
    parser.add_argument(
        "--model_type",
        default="",
        choices=list(sorted(catalog.list_model_names_from_catalog())),
        help="Model type from the catalog.",
    )
    parser.add_argument("--model",
                        default="",
                        help="Custom model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.",
    )
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location.",
    )
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help=("Specific checkpoint or model directory to load "
              "(when a directory is set, the latest checkpoint is used)."),
    )
    parser.add_argument(
        "--log_level",
        default="INFO",
        choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"],
        help="Logs verbosity.",
    )
    parser.add_argument("--seed", type=int, default=None, help="Random seed.")
    parser.add_argument(
        "--gpu_allow_growth",
        default=False,
        action="store_true",
        help="Allocate GPU memory dynamically.",
    )
    parser.add_argument(
        "--intra_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of intra op threads (0 means the system picks "
              "an appropriate number)."),
    )
    parser.add_argument(
        "--inter_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of inter op threads (0 means the system picks "
              "an appropriate number)."),
    )
    parser.add_argument(
        "--mixed_precision",
        default=False,
        action="store_true",
        help="Enable mixed precision.",
    )
    parser.add_argument(
        "--eager_execution",
        default=False,
        action="store_true",
        help="Enable TensorFlow eager execution.",
    )

    subparsers = parser.add_subparsers(help="Run type.", dest="run_type")
    subparsers.required = True
    parser_train = subparsers.add_parser("train", help="Training.")
    parser_train.add_argument(
        "--with_eval",
        default=False,
        action="store_true",
        help="Enable automatic evaluation.",
    )
    parser_train.add_argument(
        "--num_gpus",
        type=int,
        default=1,
        help="Number of GPUs to use for in-graph replication.",
    )
    parser_train.add_argument(
        "--horovod",
        default=False,
        action="store_true",
        help="Enable Horovod training mode.",
    )

    parser_eval = subparsers.add_parser("eval", help="Evaluation.")
    parser_eval.add_argument("--features_file",
                             nargs="+",
                             default=None,
                             help="Input features files.")
    parser_eval.add_argument("--labels_file",
                             default=None,
                             help="Output labels files.")

    parser_infer = subparsers.add_parser("infer", help="Inference.")
    parser_infer.add_argument("--features_file",
                              nargs="+",
                              required=True,
                              help="Run inference on this file.")
    parser_infer.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."),
    )
    parser_infer.add_argument(
        "--log_prediction_time",
        default=False,
        action="store_true",
        help="Logs some prediction time metrics.",
    )

    parser_export = subparsers.add_parser("export", help="Model export.")
    parser_export.add_argument(
        "--output_dir",
        "--export_dir",
        required=True,
        help="The directory of the exported model.",
    )
    parser_export.add_argument(
        "--format",
        "--export_format",
        choices=exporters.list_exporters(),
        default="saved_model",
        help="Format of the exported model.",
    )

    parser_score = subparsers.add_parser("score", help="Scoring.")
    parser_score.add_argument("--features_file",
                              nargs="+",
                              required=True,
                              help="Features file.")
    parser_score.add_argument("--predictions_file",
                              default=None,
                              help="Predictions to score.")

    parser_average_checkpoints = subparsers.add_parser(
        "average_checkpoints", help="Checkpoint averaging.")
    parser_average_checkpoints.add_argument(
        "--output_dir",
        required=True,
        help="The output directory for the averaged checkpoint.",
    )
    parser_average_checkpoints.add_argument(
        "--max_count",
        type=int,
        default=8,
        help="The maximal number of checkpoints to average.",
    )

    parser_update_vocab = subparsers.add_parser(
        "update_vocab", help="Update model vocabularies in checkpoint.")
    parser_update_vocab.add_argument(
        "--output_dir",
        required=True,
        help="The output directory for the updated checkpoint.",
    )
    parser_update_vocab.add_argument("--src_vocab",
                                     default=None,
                                     help="Path to the new source vocabulary.")
    parser_update_vocab.add_argument("--tgt_vocab",
                                     default=None,
                                     help="Path to the new target vocabulary.")

    # When using an option that takes multiple values just before the run type,
    # the run type is treated as a value of this option. To fix this issue, we
    # inject a placeholder option just before the run type to clearly separate it.
    parser.add_argument("--placeholder",
                        action="store_true",
                        help=argparse.SUPPRESS)
    run_types = set(subparsers.choices.keys())
    args = sys.argv[1:]
    for i, arg in enumerate(args):
        if arg in run_types:
            args.insert(i, "--placeholder")
            break

    args = parser.parse_args(args)
    if (hasattr(args, "features_file") and args.features_file
            and len(args.features_file) == 1):
        args.features_file = args.features_file[0]

    _initialize_logging(getattr(logging, args.log_level))
    tf.config.threading.set_intra_op_parallelism_threads(
        args.intra_op_parallelism_threads)
    tf.config.threading.set_inter_op_parallelism_threads(
        args.inter_op_parallelism_threads)

    if args.eager_execution:
        tf.config.run_functions_eagerly(True)

    gpus = tf.config.list_physical_devices(device_type="GPU")
    if hasattr(args, "horovod") and args.horovod:
        import horovod.tensorflow as hvd

        hvd.init()
        is_master = hvd.rank() == 0
        if gpus:
            local_gpu = gpus[hvd.local_rank()]
            tf.config.set_visible_devices(local_gpu, device_type="GPU")
            gpus = [local_gpu]
    else:
        hvd = None
        is_master = True

    if args.gpu_allow_growth:
        for device in gpus:
            tf.config.experimental.set_memory_growth(device, enable=True)

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    if is_master and not tf.io.gfile.exists(config["model_dir"]):
        tf.get_logger().info("Creating model directory %s",
                             config["model_dir"])
        tf.io.gfile.makedirs(config["model_dir"])

    model = load_model(
        config["model_dir"],
        model_file=args.model,
        model_name=args.model_type,
        serialize_model=is_master,
        as_builder=True,
    )
    runner = Runner(
        model,
        config,
        auto_config=args.auto_config,
        mixed_precision=args.mixed_precision,
        seed=args.seed,
    )

    if args.run_type == "train":
        runner.train(
            num_devices=args.num_gpus,
            with_eval=args.with_eval,
            checkpoint_path=args.checkpoint_path,
            hvd=hvd,
        )
    elif args.run_type == "eval":
        metrics = runner.evaluate(
            checkpoint_path=args.checkpoint_path,
            features_file=args.features_file,
            labels_file=args.labels_file,
        )
        print(metrics)
    elif args.run_type == "infer":
        runner.infer(
            args.features_file,
            predictions_file=args.predictions_file,
            checkpoint_path=args.checkpoint_path,
            log_time=args.log_prediction_time,
        )
    elif args.run_type == "export":
        runner.export(
            args.output_dir,
            checkpoint_path=args.checkpoint_path,
            exporter=exporters.make_exporter(args.format),
        )
    elif args.run_type == "score":
        runner.score(
            args.features_file,
            args.predictions_file,
            checkpoint_path=args.checkpoint_path,
        )
    elif args.run_type == "average_checkpoints":
        runner.average_checkpoints(args.output_dir, max_count=args.max_count)
    elif args.run_type == "update_vocab":
        runner.update_vocab(args.output_dir,
                            src_vocab=args.src_vocab,
                            tgt_vocab=args.tgt_vocab)
Exemple #14
0
 def testLoadModelInvalidArguments(self):
     with self.assertRaises(ValueError):
         config.load_model(self.get_temp_dir(), model_file="a", model_name="b")
Exemple #15
0
def create_description(path):
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
#  parser.add_argument("run", default="infer",help="Run type.")
    parser.add_argument("--config", nargs="+",default=['config/opennmt-defaults.yml', 'config/default.yml'],help="List of configuration files.")
    parser.add_argument("--auto_config", default=False, action="store_true",
                      help="Enable automatic configuration values.")
    parser.add_argument("--model_type", default="",
                      choices=list(classes_in_module(catalog, public_only=True)),
                      help="Model type from the catalog.")
    parser.add_argument("--model", default="",
                      help="Custom model configuration file.")
    parser.add_argument("--run_dir", default="",
                      help="If set, model_dir will be created relative to this location.")
    parser.add_argument("--data_dir", default="",
                      help="If set, data files are expected to be relative to this location.")
    parser.add_argument("--features_file", default=[path+'test.csv'], nargs="+",
                      help="Run inference on this file.")
    parser.add_argument("--predictions_file", default=path+"output.out",
                      help=("File used to save predictions. If not set, predictions are printed "
                            "on the standard output."))
    parser.add_argument("--log_prediction_time", default=False, action="store_true",
                      help="Logs some prediction time metrics.")
    parser.add_argument("--checkpoint_path", default='model/model.ckpt-600000',
                      help=("Checkpoint or directory to use for inference or export "
                            "(when a directory is set, the latest checkpoint is used)."))
    parser.add_argument("--export_dir_base", default=None,
                      help="The base directory of the exported model.")
    parser.add_argument("--num_gpus", type=int, default=1,
                      help="Number of GPUs to use for in-graph replication.")
    parser.add_argument("--chief_host", default="",
                      help="hostname:port of the chief worker (for distributed training).")
    parser.add_argument("--worker_hosts", default="",
                      help=("Comma-separated list of hostname:port of workers "
                            "(for distributed training)."))
    parser.add_argument("--ps_hosts", default="",
                      help=("Comma-separated list of hostname:port of parameter servers "
                            "(for distributed training)."))
    parser.add_argument("--task_type", default="chief",
                      choices=["chief", "worker", "ps", "evaluator"],
                      help="Type of the task to run (for distributed training).")
    parser.add_argument("--task_index", type=int, default=0,
                      help="ID of the task (for distributed training).")
    parser.add_argument("--horovod", default=False, action="store_true",
                      help="Enable Horovod support for this run.")
    parser.add_argument("--log_level", default="INFO",
                      choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"],
                      help="Logs verbosity.")
    parser.add_argument("--seed", type=int, default=None,
                      help="Random seed.")
    parser.add_argument("--gpu_allow_growth", default=False, action="store_true",
                      help="Allocate GPU memory dynamically.")
    parser.add_argument("--intra_op_parallelism_threads", type=int, default=0,
                      help=("Number of intra op threads (0 means the system picks "
                            "an appropriate number)."))
    parser.add_argument("--inter_op_parallelism_threads", type=int, default=0,
                      help=("Number of inter op threads (0 means the system picks "
                            "an appropriate number)."))
    parser.add_argument("--session_config", default=None,
                      help=("Path to a file containing a tf.ConfigProto message in text format "
                            "and used to create the TensorFlow sessions."))
    parser.add_argument("--json",default=None,required=True,help=("input data as json string"))
    args = parser.parse_args()
    #inp = args.inputstring
    #print (inp)
    print (args)

    tf.compat.v1.logging.set_verbosity(getattr(tf.compat.v1.logging, args.log_level))

    # Setup cluster if defined.
    if args.chief_host:
        if args.run != "train_and_eval":
            raise ValueError("Distributed training is only supported with the train_and_eval run type")
        os.environ["TF_CONFIG"] = json.dumps({
            "cluster": {
                "chief": [args.chief_host],
                "worker": args.worker_hosts.split(","),
                "ps": args.ps_hosts.split(",")
            },
            "task": {
                "type": args.task_type,
                "index": args.task_index
            }
        })

    # Initialize Horovd if defined.
    if args.horovod:
        import horovod.tensorflow as hvd
        hvd.init()
        is_chief = hvd.rank() == 0
    else:
        hvd = None
        is_chief = args.task_type == "chief"

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    if is_chief and not tf.io.gfile.exists(config["model_dir"]):
        tf.logging.info("Creating model directory %s", config["model_dir"])
        tf.gfile.MakeDirs(config["model_dir"])
    model = load_model(
      config["model_dir"],
      model_file=args.model,
      model_name=args.model_type,
      serialize_model=is_chief)
    session_config = tf.compat.v1.ConfigProto(
      intra_op_parallelism_threads=args.intra_op_parallelism_threads,
      inter_op_parallelism_threads=args.inter_op_parallelism_threads)
     # gpu_options=tf.GPUOptions(
     #     allow_growth=args.gpu_allow_growth))
    if args.session_config is not None:
        with open(args.session_config, "rb") as session_config_file:
            text_format.Merge(session_config_file.read(), session_config)
    try:
    #if True:
        #add 11/25,change json string to file
        with open(args.json,"r") as load_f:
        #data = json.loads(args.json)
            data = json.load(load_f)
#        print (data["category1"])
        savedata(data,path)
    except:
        print ("json is incorrected")
        exit(1)
    runner = Runner(
      model,
      config,
      seed=args.seed,
      #num_devices=args.num_gpus,
      session_config=session_config,
      auto_config=args.auto_config,
      hvd=hvd)

    if not args.features_file:
        parser.error("--features_file is required for inference.")
    elif len(args.features_file) == 1:
        args.features_file = args.features_file[0]
    print ("begin to run....")
    runner.infer(
        args.features_file,
        predictions_file=args.predictions_file,
        checkpoint_path=args.checkpoint_path,
        log_time=args.log_prediction_time)
    description = normalize.normalize(path+"output.out",data)
    data["description"] = description
    json_str = json.dumps(data,ensure_ascii=False)
	
    return (json_str)
Exemple #16
0
bcrypt = Bcrypt(app)
login_manager = LoginManager(app)
login_manager.login_view = 'login'
login_manager.login_message_category = 'info'

avi_vectorizer = load(
    os.path.join('website', 'static', 'arabic_variety_identification',
                 'dialect_identification.vec'))
avi_model = load(
    os.path.join('website', 'static', 'arabic_variety_identification',
                 'dialect_identification.mdl'))

ca_config = load_config(
    ["diacritizer/opennmt-defaults.yml", "diacritizer_ca/toy-ende.yml"])
ca_model = load_model(ca_config["model_dir"],
                      model_file="",
                      model_name="",
                      serialize_model=False)
ca_runner = Runner(ca_model,
                   ca_config,
                   seed=None,
                   num_devices=1,
                   gpu_allow_growth=False)

msa_config = load_config(
    ["diacritizer/opennmt-defaults.yml", "diacritizer_msa/toy-ende.yml"])
msa_model = load_model(msa_config["model_dir"],
                       model_file="",
                       model_name="",
                       serialize_model=False)
msa_runner = Runner(msa_model,
                    msa_config,
Exemple #17
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("run",
                        choices=[
                            "train_and_eval", "train", "eval", "infer",
                            "export", "score"
                        ],
                        help="Run type.")
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument("--model_type",
                        default="",
                        choices=list(classes_in_module(catalog)),
                        help="Model type from the catalog.")
    parser.add_argument("--model",
                        default="",
                        help="Custom model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.")
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location."
    )
    parser.add_argument("--features_file",
                        default=[],
                        nargs="+",
                        help="Run inference on this file.")
    parser.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."))
    parser.add_argument("--log_prediction_time",
                        default=False,
                        action="store_true",
                        help="Logs some prediction time metrics.")
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help=("Checkpoint or directory to use for inference or export "
              "(when a directory is set, the latest checkpoint is used)."))
    parser.add_argument("--num_gpus",
                        type=int,
                        default=1,
                        help="Number of GPUs to use for in-graph replication.")
    parser.add_argument(
        "--chief_host",
        default="",
        help="hostname:port of the chief worker (for distributed training).")
    parser.add_argument(
        "--worker_hosts",
        default="",
        help=("Comma-separated list of hostname:port of workers "
              "(for distributed training)."))
    parser.add_argument(
        "--ps_hosts",
        default="",
        help=("Comma-separated list of hostname:port of parameter servers "
              "(for distributed training)."))
    parser.add_argument(
        "--task_type",
        default="chief",
        choices=["chief", "worker", "ps", "evaluator"],
        help="Type of the task to run (for distributed training).")
    parser.add_argument("--task_index",
                        type=int,
                        default=0,
                        help="ID of the task (for distributed training).")
    parser.add_argument("--log_level",
                        default="INFO",
                        choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"],
                        help="Logs verbosity.")
    parser.add_argument("--seed", type=int, default=None, help="Random seed.")
    parser.add_argument("--gpu_allow_growth",
                        default=False,
                        action="store_true",
                        help="Allocate GPU memory dynamically.")
    parser.add_argument(
        "--intra_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of intra op threads (0 means the system picks "
              "an appropriate number)."))
    parser.add_argument(
        "--inter_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of inter op threads (0 means the system picks "
              "an appropriate number)."))
    parser.add_argument(
        "--session_config",
        default=None,
        help=(
            "Path to a file containing a tf.ConfigProto message in text format "
            "and used to create the TensorFlow sessions."))
    args = parser.parse_args()

    tf.logging.set_verbosity(getattr(tf.logging, args.log_level))

    # Setup cluster if defined.
    if args.chief_host:
        os.environ["TF_CONFIG"] = json.dumps({
            "cluster": {
                "chief": [args.chief_host],
                "worker": args.worker_hosts.split(","),
                "ps": args.ps_hosts.split(",")
            },
            "task": {
                "type": args.task_type,
                "index": args.task_index
            }
        })

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    is_chief = args.task_type == "chief"
    if is_chief and not tf.gfile.Exists(config["model_dir"]):
        tf.logging.info("Creating model directory %s", config["model_dir"])
        tf.gfile.MakeDirs(config["model_dir"])

    model = load_model(config["model_dir"],
                       model_file=args.model,
                       model_name=args.model_type,
                       serialize_model=is_chief)
    session_config = tf.ConfigProto(
        intra_op_parallelism_threads=args.intra_op_parallelism_threads,
        inter_op_parallelism_threads=args.inter_op_parallelism_threads)
    if args.session_config is not None:
        with open(args.session_config, "rb") as session_config_file:
            text_format.Merge(session_config_file.read(), session_config)
    runner = Runner(model,
                    config,
                    seed=args.seed,
                    num_devices=args.num_gpus,
                    gpu_allow_growth=args.gpu_allow_growth,
                    session_config=session_config)

    if args.run == "train_and_eval":
        runner.train_and_evaluate()
    elif args.run == "train":
        runner.train()
    elif args.run == "eval":
        runner.evaluate(checkpoint_path=args.checkpoint_path)
    elif args.run == "infer":
        if not args.features_file:
            parser.error("--features_file is required for inference.")
        elif len(args.features_file) == 1:
            args.features_file = args.features_file[0]
        runner.infer(args.features_file,
                     predictions_file=args.predictions_file,
                     checkpoint_path=args.checkpoint_path,
                     log_time=args.log_prediction_time)
    elif args.run == "export":
        runner.export(checkpoint_path=args.checkpoint_path)
    elif args.run == "score":
        if not args.features_file:
            parser.error("--features_file is required for scoring.")
        if not args.predictions_file:
            parser.error("--predictions_file is required for scoring.")
        runner.score(args.features_file,
                     args.predictions_file,
                     checkpoint_path=args.checkpoint_path)
Exemple #18
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version="OpenNMT-tf %s" % __version__)
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument("--auto_config",
                        default=False,
                        action="store_true",
                        help="Enable automatic configuration values.")
    parser.add_argument("--model_type",
                        default="",
                        choices=list(
                            classes_in_module(catalog, public_only=True)),
                        help="Model type from the catalog.")
    parser.add_argument("--model",
                        default="",
                        help="Custom model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.")
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location."
    )
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help=("Specific checkpoint or model directory to load "
              "(when a directory is set, the latest checkpoint is used)."))
    parser.add_argument(
        "--log_level",
        default="INFO",
        choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"],
        help="Logs verbosity.")
    parser.add_argument("--seed", type=int, default=None, help="Random seed.")
    parser.add_argument("--gpu_allow_growth",
                        default=False,
                        action="store_true",
                        help="Allocate GPU memory dynamically.")
    parser.add_argument(
        "--intra_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of intra op threads (0 means the system picks "
              "an appropriate number)."))
    parser.add_argument(
        "--inter_op_parallelism_threads",
        type=int,
        default=0,
        help=("Number of inter op threads (0 means the system picks "
              "an appropriate number)."))
    parser.add_argument("--mixed_precision",
                        default=False,
                        action="store_true",
                        help="Enable mixed precision.")

    subparsers = parser.add_subparsers(help="Run type.", dest="run")
    parser_train = subparsers.add_parser("train", help="Training.")
    parser_train.add_argument("--with_eval",
                              default=False,
                              action="store_true",
                              help="Enable automatic evaluation.")
    parser_train.add_argument(
        "--num_gpus",
        type=int,
        default=1,
        help="Number of GPUs to use for in-graph replication.")

    parser_eval = subparsers.add_parser("eval", help="Evaluation.")
    parser_eval.add_argument("--features_file",
                             nargs="+",
                             default=None,
                             help="Input features files.")
    parser_eval.add_argument("--labels_file",
                             default=None,
                             help="Output labels files.")

    parser_infer = subparsers.add_parser("infer", help="Inference.")
    parser_infer.add_argument("--features_file",
                              nargs="+",
                              required=True,
                              help="Run inference on this file.")
    parser_infer.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."))
    parser_infer.add_argument("--log_prediction_time",
                              default=False,
                              action="store_true",
                              help="Logs some prediction time metrics.")

    parser_export = subparsers.add_parser("export", help="Model export.")
    parser_export.add_argument("--export_dir",
                               required=True,
                               help="The directory of the exported model.")

    parser_score = subparsers.add_parser("score", help="Scoring.")
    parser_score.add_argument("--features_file",
                              nargs="+",
                              required=True,
                              help="Features file.")
    parser_score.add_argument("--predictions_file",
                              default=None,
                              help="Predictions to score.")

    parser_average_checkpoints = subparsers.add_parser(
        "average_checkpoints", help="Checkpoint averaging.")
    parser_average_checkpoints.add_argument(
        "--output_dir",
        required=True,
        help="The output directory for the averaged checkpoint.")
    parser_average_checkpoints.add_argument(
        "--max_count",
        type=int,
        default=8,
        help="The maximal number of checkpoints to average.")

    parser_update_vocab = subparsers.add_parser(
        "update_vocab", help="Update model vocabularies in checkpoint.")
    parser_update_vocab.add_argument(
        "--output_dir",
        required=True,
        help="The output directory for the updated checkpoint.")
    parser_update_vocab.add_argument("--src_vocab",
                                     default=None,
                                     help="Path to the new source vocabulary.")
    parser_update_vocab.add_argument("--tgt_vocab",
                                     default=None,
                                     help="Path to the new target vocabulary.")

    args = parser.parse_args()
    if hasattr(args, "features_file") and args.features_file and len(
            args.features_file) == 1:
        args.features_file = args.features_file[0]

    _set_log_level(getattr(logging, args.log_level))
    tf.config.threading.set_intra_op_parallelism_threads(
        args.intra_op_parallelism_threads)
    tf.config.threading.set_inter_op_parallelism_threads(
        args.inter_op_parallelism_threads)
    if args.gpu_allow_growth:
        for device in tf.config.experimental.list_physical_devices(
                device_type="GPU"):
            tf.config.experimental.set_memory_growth(device, enable=True)

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    if not tf.io.gfile.exists(config["model_dir"]):
        tf.get_logger().info("Creating model directory %s",
                             config["model_dir"])
        tf.io.gfile.makedirs(config["model_dir"])

    model = load_model(config["model_dir"],
                       model_file=args.model,
                       model_name=args.model_type)
    runner = Runner(model,
                    config,
                    auto_config=args.auto_config,
                    mixed_precision=args.mixed_precision,
                    seed=args.seed)

    if args.run == "train":
        runner.train(num_devices=args.num_gpus,
                     with_eval=args.with_eval,
                     checkpoint_path=args.checkpoint_path)
    elif args.run == "eval":
        metrics = runner.evaluate(checkpoint_path=args.checkpoint_path,
                                  features_file=args.features_file,
                                  labels_file=args.labels_file)
        print(metrics)
    elif args.run == "infer":
        runner.infer(args.features_file,
                     predictions_file=args.predictions_file,
                     checkpoint_path=args.checkpoint_path,
                     log_time=args.log_prediction_time)
    elif args.run == "export":
        runner.export(args.export_dir, checkpoint_path=args.checkpoint_path)
    elif args.run == "score":
        runner.score(args.features_file,
                     args.predictions_file,
                     checkpoint_path=args.checkpoint_path)
    elif args.run == "average_checkpoints":
        runner.average_checkpoints(args.output_dir, max_count=args.max_count)
    elif args.run == "update_vocab":
        runner.update_vocab(args.output_dir,
                            src_vocab=args.src_vocab,
                            tgt_vocab=args.tgt_vocab)
Exemple #19
0
 def testLoadModelMissingModel(self):
     with self.assertRaises(RuntimeError):
         config.load_model(self.get_temp_dir())
Exemple #20
0
 def testLoadModelInvalidInvalidFile(self):
     with self.assertRaisesRegex(ValueError, "not found"):
         config.load_model(self.get_temp_dir(), model_file="a")
Exemple #21
0
 def testLoadModelInvalidInvalidName(self):
     with self.assertRaisesRegex(ValueError, "does not exist"):
         config.load_model(self.get_temp_dir(), model_name="b")
Exemple #22
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "run",
        choices=["train_and_eval", "train", "eval", "infer", "export"],
        help="Run type.")
    parser.add_argument("--config",
                        required=True,
                        nargs="+",
                        help="List of configuration files.")
    parser.add_argument("--model",
                        default="",
                        help="Model configuration file.")
    parser.add_argument(
        "--run_dir",
        default="",
        help="If set, model_dir will be created relative to this location.")
    parser.add_argument(
        "--data_dir",
        default="",
        help="If set, data files are expected to be relative to this location."
    )
    parser.add_argument("--features_file",
                        default=[],
                        nargs="+",
                        help="Run inference on this file.")
    parser.add_argument(
        "--predictions_file",
        default="",
        help=
        ("File used to save predictions. If not set, predictions are printed "
         "on the standard output."))
    parser.add_argument(
        "--checkpoint_path",
        default=None,
        help=("Checkpoint or directory to use for inference or export "
              "(when a directory is set, the latest checkpoint is used)."))
    parser.add_argument("--num_gpus",
                        type=int,
                        default=1,
                        help="Number of GPUs to use for in-graph replication.")
    parser.add_argument(
        "--chief_host",
        default="",
        help="hostname:port of the chief worker (for distributed training).")
    parser.add_argument(
        "--worker_hosts",
        default="",
        help=("Comma-separated list of hostname:port of workers "
              "(for distributed training)."))
    parser.add_argument(
        "--ps_hosts",
        default="",
        help=("Comma-separated list of hostname:port of parameter servers "
              "(for distributed training)."))
    parser.add_argument(
        "--task_type",
        default="chief",
        choices=["chief", "worker", "ps", "evaluator"],
        help="Type of the task to run (for distributed training).")
    parser.add_argument("--task_index",
                        type=int,
                        default=0,
                        help="ID of the task (for distributed training).")
    parser.add_argument("--log_level",
                        default="INFO",
                        choices=["DEBUG", "ERROR", "FATAL", "INFO", "WARN"],
                        help="Logs verbosity.")
    parser.add_argument("--seed", type=int, default=None, help="Random seed.")
    parser.add_argument("--gpu_allow_growth",
                        default=False,
                        action="store_true",
                        help="Allocate GPU memory dynamically.")
    args = parser.parse_args()

    tf.logging.set_verbosity(getattr(tf.logging, args.log_level))

    # Setup cluster if defined.
    if args.chief_host:
        os.environ["TF_CONFIG"] = json.dumps({
            "cluster": {
                "chief": [args.chief_host],
                "worker": args.worker_hosts.split(","),
                "ps": args.ps_hosts.split(",")
            },
            "task": {
                "type": args.task_type,
                "index": args.task_index
            }
        })

    # Load and merge run configurations.
    config = load_config(args.config)
    if args.run_dir:
        config["model_dir"] = os.path.join(args.run_dir, config["model_dir"])
    if args.data_dir:
        config["data"] = _prefix_paths(args.data_dir, config["data"])

    if not os.path.isdir(config["model_dir"]):
        tf.logging.info("Creating model directory %s", config["model_dir"])
        os.makedirs(config["model_dir"])

    model = load_model(config["model_dir"], model_file=args.model)
    runner = Runner(model,
                    config,
                    seed=args.seed,
                    num_devices=args.num_gpus,
                    gpu_allow_growth=args.gpu_allow_growth)

    if args.run == "train_and_eval":
        runner.train_and_evaluate()
    elif args.run == "train":
        runner.train()
    elif args.run == "eval":
        runner.evaluate(checkpoint_path=args.checkpoint_path)
    elif args.run == "infer":
        if not args.features_file:
            parser.error("--features_file is required for inference.")
        elif len(args.features_file) == 1:
            args.features_file = args.features_file[0]
        runner.infer(args.features_file,
                     predictions_file=args.predictions_file,
                     checkpoint_path=args.checkpoint_path)
    elif args.run == "export":
        runner.export(checkpoint_path=args.checkpoint_path)
Exemple #23
0
from opennmt.models import SequenceToSequence
from opennmt.config import load_model


def load_config(config_path: str):
    """
    Loads an OpenNMT config file

    Arguments:
        config_path: The path to the config file

    Returns:
        A dict containing the config data
    """

    with open(config_path, encoding='utf-8') as f:
        return yaml.load(f)


__dir = path.dirname(__file__)
config_path = path.join(__dir, 'config.yml')
model_file = path.join(__dir, 'nmt_small.py')

config = load_config(config_path)

model_dir = config['model_dir']
if not path.isdir(model_dir):
    makedirs(model_dir)
model = load_model(model_dir, model_file=model_file)