def main(settings): """ Translates a source language file (or STDIN) into a target language file (or STDOUT). """ # Start logging. level = logging.DEBUG if settings.verbose else logging.INFO logging.basicConfig(level=level, format='%(levelname)s: %(message)s') # Create the TensorFlow session. tf_config = tf.ConfigProto() tf_config.allow_soft_placement = True session = tf.Session(config=tf_config) # Load config file for each model. configs = [] for model in settings.models: config = load_config_from_json_file(model) setattr(config, 'reload', model) configs.append(config) # Create the model graphs and restore their variables. logging.debug("Loading models\n") models = [] # ============= 19/8/16 KP ============ warning('='*20 + 'Model Config to Load') warning(settings.models) # ===================================== for i, config in enumerate(configs): with tf.variable_scope("model%d" % i) as scope: if config.model_type == "transformer": model = TransformerModel(config) else: model = rnn_model.RNNModel(config) saver = model_loader.init_or_restore_variables(config, session, ensemble_scope=scope) model.sampling_utils = SamplingUtils(settings) models.append(model) # ============= 19/8/16 KP ============ model_summary() # ===================================== # TODO Ensembling is currently only supported for RNNs, so if # TODO len(models) > 1 then check models are all rnn # Translate the source file. inference.translate_file(input_file=settings.input, output_file=settings.output, session=session, models=models, configs=configs, beam_size=settings.beam_size, nbest=settings.n_best, minibatch_size=settings.minibatch_size, maxibatch_size=settings.maxibatch_size, normalization_alpha=settings.normalization_alpha)
def main(settings): """ Translates a source language file (or STDIN) into a target language file (or STDOUT). """ # Create the TensorFlow session. tf_config = tf.ConfigProto() tf_config.allow_soft_placement = True session = tf.Session(config=tf_config) # Load config file for each model. configs = [] for model in settings.models: config = load_config_from_json_file(model) setattr(config, 'reload', model) configs.append(config) # Create the model graphs. logging.debug("Loading models\n") models = [] for i, config in enumerate(configs): with tf.variable_scope("model%d" % i) as scope: if config.model_type == "transformer": model = TransformerModel(config) else: model = rnn_model.RNNModel(config) model.sampling_utils = SamplingUtils(settings) models.append(model) # Add smoothing variables (if the models were trained with smoothing). #FIXME Assumes either all models were trained with smoothing or none were. if configs[0].exponential_smoothing > 0.0: smoothing = ExponentialSmoothing(configs[0].exponential_smoothing) # Restore the model variables. for i, config in enumerate(configs): with tf.variable_scope("model%d" % i) as scope: _ = model_loader.init_or_restore_variables(config, session, ensemble_scope=scope) # Swap-in the smoothed versions of the variables. if configs[0].exponential_smoothing > 0.0: session.run(fetches=smoothing.swap_ops) # TODO Ensembling is currently only supported for RNNs, so if # TODO len(models) > 1 then check models are all rnn # Translate the source file. inference.translate_file(input_file=settings.input, output_file=settings.output, session=session, models=models, configs=configs, beam_size=settings.beam_size, nbest=settings.n_best, minibatch_size=settings.minibatch_size, maxibatch_size=settings.maxibatch_size, normalization_alpha=settings.normalization_alpha)
def main(settings): """ Translates a source language file (or STDIN) into a target language file (or STDOUT). """ # Start logging. level = logging.DEBUG if settings.verbose else logging.INFO logging.basicConfig(level=level, format='%(levelname)s: %(message)s') # Create the TensorFlow session. if settings.cpu: logging.info("using cpu now...") os.environ["CUDA_VISIBLE_DEVICES"] = "" tf_config = tf.ConfigProto(device_count={'GPU': 0}) else: os.environ["CUDA_VISIBLE_DEVICES"] = "2" tf_config = tf.ConfigProto() tf_config.allow_soft_placement = True session = tf.Session(config=tf_config) # Load config file for each model. configs = [] for model in settings.models: config = util.load_config(model) compat.fill_options(config) config['reload'] = model configs.append(argparse.Namespace(**config)) # Create the model graphs and restore their variables. logging.debug("Loading models") models = [] for i, config in enumerate(configs): with tf.variable_scope("model%d" % i) as scope: model = rnn_model.RNNModel(config) saver = model_loader.init_or_restore_variables( config, session, ensemble_scope=scope) models.append(model) logging.debug("Models load done.") # Translate the source file. inference.translate_file(input_file=settings.input, output_file=settings.output, session=session, models=models, configs=configs, beam_size=settings.beam_size, nbest=settings.n_best, minibatch_size=settings.minibatch_size, maxibatch_size=settings.maxibatch_size, normalization_alpha=settings.normalization_alpha)
def main(settings): """ Translates a source language file (or STDIN) into a target language file (or STDOUT). """ # Start logging. level = logging.DEBUG if settings.verbose else logging.INFO logging.basicConfig(level=level, format='%(levelname)s: %(message)s') # Create the TensorFlow session. tf_config = tf.ConfigProto() tf_config.allow_soft_placement = True session = tf.Session(config=tf_config) # Load config file for each model. configs = [] for model in settings.models: config = load_config_from_json_file(model) setattr(config, 'reload', model) configs.append(config) # Create the model graphs and restore their variables. logging.debug("Loading models\n") models = [] for i, config in enumerate(configs): with tf.variable_scope("model%d" % i) as scope: if config.model_type == "transformer": model = TransformerModel(config) else: model = rnn_model.RNNModel(config) saver = model_loader.init_or_restore_variables(config, session, ensemble_scope=scope) models.append(model) # TODO Ensembling is currently only supported for RNNs, so if # TODO len(models) > 1 then check models are all rnn # Translate the source file. inference.translate_file(input_file=settings.input, output_file=settings.output, session=session, models=models, configs=configs, beam_size=settings.beam_size, nbest=settings.n_best, minibatch_size=settings.minibatch_size, maxibatch_size=settings.maxibatch_size, normalization_alpha=settings.normalization_alpha)
def validate_with_script(session, model, config): if config.valid_script == None: return None logging.info('Starting external validation.') out = tempfile.NamedTemporaryFile(mode='w') inference.translate_file(input_file=open(config.valid_source_dataset, encoding="UTF-8"), output_file=out, session=session, models=[model], configs=[config], beam_size=config.beam_size, minibatch_size=config.valid_batch_size, normalization_alpha=config.normalization_alpha) out.flush() args = [config.valid_script, out.name] proc = subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout_bytes, stderr_bytes = proc.communicate() encoding = locale.getpreferredencoding() stdout = stdout_bytes.decode(encoding=encoding) stderr = stderr_bytes.decode(encoding=encoding) if len(stderr) > 0: logging.info("Validation script wrote the following to standard " "error:\n" + stderr) if proc.returncode != 0: logging.warning("Validation script failed (returned exit status of " "{}).".format(proc.returncode)) return None try: score = float(stdout.split()[0]) except: logging.warning("Validation script output does not look like a score: " "{}".format(stdout)) return None logging.info("Validation script score: {}".format(score)) return score
def validate_with_script(session, model, config): if config.valid_script == None: return None logging.info('Starting external validation.') out = tempfile.NamedTemporaryFile(mode='w') inference.translate_file(input_file=open(config.valid_source_dataset), output_file=out, session=session, models=[model], configs=[config], beam_size=config.beam_size, minibatch_size=config.valid_batch_size, normalization_alpha=config.normalization_alpha) out.flush() args = [config.valid_script, out.name] proc = subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout_bytes, stderr_bytes = proc.communicate() encoding = locale.getpreferredencoding() stdout = stdout_bytes.decode(encoding=encoding) stderr = stderr_bytes.decode(encoding=encoding) if len(stderr) > 0: logging.info("Validation script wrote the following to standard " "error:\n" + stderr) if proc.returncode != 0: logging.warning("Validation script failed (returned exit status of " "{}).".format(proc.returncode)) return None try: score = float(stdout.split()[0]) except: logging.warning("Validation script output does not look like a score: " "{}".format(stdout)) return None logging.info("Validation script score: {}".format(score)) return score