def decode_and_evaluate(self, checkpoint: Optional[int] = None, output_name: str = os.devnull, speed_percentile: int = 99) -> Dict[str, float]: """ Decodes data set and evaluates given a checkpoint. :param checkpoint: Checkpoint to load parameters from. :param output_name: Filename to write translations to. Defaults to /dev/null. :param speed_percentile: Percentile to compute for sec/sent. Default: p99. :return: Mapping of metric names to scores. """ models, vocab_source, vocab_target = load_models(self.context, self.max_input_len, self.beam_size, [self.model], [checkpoint], softmax_temperature=self.softmax_temperature, max_output_length_num_stds=self.max_output_length_num_stds) translator = Translator(self.context, self.ensemble_mode, self.bucket_width_source, self.bucket_width_target, LengthPenalty(self.length_penalty_alpha, self.length_penalty_beta), models, vocab_source, vocab_target) trans_wall_times = np.zeros((len(self.input_sentences),)) with smart_open(output_name, 'w') as output: handler = sockeye.output_handler.StringOutputHandler(output) translations = [] for i, input_sentence in enumerate(self.input_sentences): tic = time.time() trans_input = translator.make_input(i, input_sentence) trans_output = translator.translate(trans_input) handler.handle(trans_input, trans_output) trans_wall_time = time.time() - tic trans_wall_times[i] = trans_wall_time translations.append(trans_output.translation) percentile_sec_per_sent = np.percentile(trans_wall_times, speed_percentile) # TODO(fhieber): eventually add more metrics (METEOR etc.) return {C.BLEU_VAL: sockeye.bleu.corpus_bleu(translations, self.target_sentences), C.SPEED_PCT % speed_percentile: percentile_sec_per_sent}
def main(): params = argparse.ArgumentParser(description='Translate CLI') arguments.add_translate_cli_args(params) arguments.add_bpe_args(params) args = params.parse_args() with ExitStack() as exit_stack: if args.use_cpu: context = mx.cpu() else: num_gpus = get_num_gpus() check_condition(num_gpus >= 1, "No GPUs found, consider running on the CPU with --use-cpu " "(note: check depends on nvidia-smi and this could also mean that the nvidia-smi " "binary isn't on the path).") check_condition(len(args.device_ids) == 1, "cannot run on multiple devices for now") gpu_id = args.device_ids[0] if args.disable_device_locking: if gpu_id < 0: # without locking and a negative device id we just take the first device gpu_id = 0 else: gpu_ids = exit_stack.enter_context(acquire_gpus([gpu_id], lock_dir=args.lock_dir)) gpu_id = gpu_ids[0] context = mx.gpu(gpu_id) models, source_vocabs, target_vocab = inference.load_models( context=context, max_input_len=args.max_input_len, beam_size=args.beam_size, batch_size=args.batch_size, model_folders=args.models, checkpoints=args.checkpoints, softmax_temperature=args.softmax_temperature, max_output_length_num_stds=args.max_output_length_num_stds, decoder_return_logit_inputs=args.restrict_lexicon is not None, cache_output_layer_w_b=args.restrict_lexicon is not None) translator = inference.Translator(context=context, ensemble_mode=args.ensemble_mode, bucket_source_width=args.bucket_width, length_penalty=inference.LengthPenalty(args.length_penalty_alpha, args.length_penalty_beta), models=models, source_vocabs=source_vocabs, target_vocab=target_vocab, restrict_lexicon=None, store_beam=False, strip_unknown_words=args.strip_unknown_words) logger.info('Parsing vocabulary') sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', write_through=True, line_buffering=True) opened_vocab = codecs.open(args.bpe_vocabulary.name, encoding='utf-8') bpe_filtered_vocab = read_vocabulary(opened_vocab, args.bpe_vocabulary_threshold) bpe_merges = -1 # Apply all merge operations. bpe_separator = '@@' # Use default BPE separator. bpe_glossaries = None # No excluded words. bpe = BPE(args.bpe_codes, bpe_merges, bpe_separator, bpe_filtered_vocab, bpe_glossaries) logger.info('Starting RPC server.') rpc_server = SockeyeRpcServer(translator, bpe) rpc_server.serve()
def __init__(self, model, src, tgt, rep, gpuid): self.model = model print("Model:", self.model) self.representation = rep self.src = src self.tgt = tgt self.gpuid = gpuid params = argparse.ArgumentParser(description='Scoring CLI') arguments.add_score_cli_args(params) param = ["-m", self.model] if (gpuid == ""): param += ["--use-cpu"] print(param) args = params.parse_known_args(param)[0] #dummy_parser = argparse.ArgumentParser(description='train.py') #onmt.opts.model_opts(dummy_parser) #onmt.opts.translate_opts(dummy_parser) #param = ["-model", self.model, "-src", self.src] #if (gpuid != ""): # param += ["-gpu", self.gpuid] #if (self.tgt != ""): # param += ["-tgt",self.tgt] #self.opt = dummy_parser.parse_known_args(param)[0] #self.translator = build_translator(self.opt) self.output_handler = output_handler.get_output_handler( C.OUTPUT_HANDLER_TRANSLATION_WITH_SCORE, None, 0.9) with ExitStack() as exit_stack: context = _setup_context(args, exit_stack) models, source_vocabs, target_vocab = inference.load_models( context=context, max_input_len=None, beam_size=1, batch_size=1, model_folders=[self.model], checkpoints=None, softmax_temperature=None, max_output_length_num_stds=C.DEFAULT_NUM_STD_MAX_OUTPUT_LENGTH, decoder_return_logit_inputs=False, cache_output_layer_w_b=False) self.translator = inference.Translator( context=context, ensemble_mode="linear", bucket_source_width=10, length_penalty=inference.LengthPenalty(1.0, 0.0), beam_prune=0, beam_search_stop=C.BEAM_SEARCH_STOP_ALL, models=models, source_vocabs=source_vocabs, target_vocab=target_vocab, restrict_lexicon=None, store_beam=False, strip_unknown_words=False) self.data = representation.Dataset.Dataset("testdata") if (self.representation == "EncoderWordEmbeddings" or self.representation == "EncoderHiddenLayer"): self.translator.models[0].encoder_module._vivisect = { "iteration": 0, "rescore": 1, "model_name": "Sockeye", "framework": "mxnet" } probe(self.translator.models[0].encoder_module, select=self.monitorONMT, perform=self.performONMT, cb=self.storeData)
def initialize(self, context): super(SockeyeService, self).initialize(context) self.basedir = context.system_properties.get('model_dir') self.preprocessor = ChineseCharPreprocessor( os.path.join(self.basedir, 'bpe.codes.zh-en'), os.path.join(self.basedir, 'scripts'), os.path.join(self.basedir, 'scripts')) self.postprocessor = Detokenizer( os.path.join(self.basedir, 'scripts', 'detokenize.pl')) params = arguments.ConfigArgumentParser(description='Translate CLI') arguments.add_translate_cli_args(params) sockeye_args_path = os.path.join(self.basedir, 'sockeye-args.txt') sockeye_args = params.parse_args(read_sockeye_args(sockeye_args_path)) # override models directory sockeye_args.models = [self.basedir] if 'gpu_id' in context.system_properties: self.device_ids.append(context.system_properties['gpu_id']) else: logging.warning('No gpu_id found in context') self.device_ids.append(0) if sockeye_args.checkpoints is not None: check_condition( len(sockeye_args.checkpoints) == len(sockeye_args.models), 'must provide checkpoints for each model') if sockeye_args.skip_topk: check_condition( sockeye_args.beam_size == 1, '--skip-topk has no effect if beam size is larger than 1') check_condition( len(sockeye_args.models) == 1, '--skip-topk has no effect for decoding with more than 1 model' ) if sockeye_args.nbest_size > 1: check_condition( sockeye_args.beam_size >= sockeye_args.nbest_size, 'Size of nbest list (--nbest-size) must be smaller or equal to beam size (--beam-size).' ) check_condition( sockeye_args.beam_search_drop == const.BEAM_SEARCH_STOP_ALL, '--nbest-size > 1 requires beam search to only stop after all hypotheses are finished ' '(--beam-search-stop all)') if sockeye_args.output_type != const.OUTPUT_HANDLER_NBEST: logging.warning( 'For nbest translation, output handler must be "%s", overriding option --output-type.', const.OUTPUT_HANDLER_NBEST) sockeye_args.output_type = const.OUTPUT_HANDLER_NBEST log_basic_info(sockeye_args) output_handler = get_output_handler(sockeye_args.output_type, sockeye_args.output, sockeye_args.sure_align_threshold) with ExitStack() as exit_stack: check_condition( len(self.device_ids) == 1, 'translate only supports single device for now') translator_ctx = determine_context( device_ids=self.device_ids, use_cpu=sockeye_args.use_cpu, disable_device_locking=sockeye_args.disable_device_locking, lock_dir=sockeye_args.lock_dir, exit_stack=exit_stack)[0] logging.info('Translate Device: %s', translator_ctx) if sockeye_args.override_dtype == const.DTYPE_FP16: logging.warning( 'Experimental feature \'--override-dtype float16\' has been used. ' 'This feature may be removed or change its behavior in the future. ' 'DO NOT USE IT IN PRODUCTION') models, source_vocabs, target_vocab = inference.load_models( context=translator_ctx, max_input_len=sockeye_args.max_input_len, beam_size=sockeye_args.beam_size, batch_size=sockeye_args.batch_size, model_folders=sockeye_args.models, checkpoints=sockeye_args.checkpoints, softmax_temperature=sockeye_args.softmax_temperature, max_output_length_num_stds=sockeye_args. max_output_length_num_stds, decoder_return_logit_inputs=sockeye_args.restrict_lexicon is not None, cache_output_layer_w_b=sockeye_args.restrict_lexicon is not None, override_dtype=sockeye_args.override_dtype, output_scores=output_handler.reports_score()) restrict_lexicon = None if sockeye_args.restrict_lexicon: restrict_lexicon = TopKLexicon(source_vocabs[0], target_vocab) restrict_lexicon.load(sockeye_args.restrict_lexicon, k=sockeye_args.restrict_lexicon_topk) store_beam = sockeye_args.output_type == const.OUTPUT_HANDLER_BEAM_STORE self.translator = inference.Translator( context=translator_ctx, ensemble_mode=sockeye_args.ensemble_mode, bucket_source_width=sockeye_args.bucket_width, length_penalty=inference.LengthPenalty( sockeye_args.length_penalty_alpha, sockeye_args.length_penalty_beta), beam_prune=sockeye_args.beam_prune, beam_search_stop=sockeye_args.beam_search_stop, nbest_size=sockeye_args.nbest_size, models=models, source_vocabs=source_vocabs, target_vocab=target_vocab, restrict_lexicon=restrict_lexicon, avoid_list=sockeye_args.avoid_list, store_beam=store_beam, strip_unknown_words=sockeye_args.strip_unknown_words, skip_topk=sockeye_args.skip_topk)
def run_translate(args: argparse.Namespace): if args.output is not None: global logger logger = setup_main_logger(__name__, console=not args.quiet, file_logging=True, path="%s.%s" % (args.output, C.LOG_NAME)) if args.checkpoints is not None: check_condition( len(args.checkpoints) == len(args.models), "must provide checkpoints for each model") if args.skip_topk: check_condition( args.beam_size == 1, "--skip-topk has no effect if beam size is larger than 1") check_condition( len(args.models) == 1, "--skip-topk has no effect for decoding with more than 1 model") log_basic_info(args) output_handler = get_output_handler(args.output_type, args.output, args.sure_align_threshold) with ExitStack() as exit_stack: check_condition( len(args.device_ids) == 1, "translate only supports single device for now") edge_vocab = vocab.vocab_from_json(args.edge_vocab) context = determine_context( device_ids=args.device_ids, use_cpu=args.use_cpu, disable_device_locking=args.disable_device_locking, lock_dir=args.lock_dir, exit_stack=exit_stack)[0] logger.info("Translate Device: %s", context) if args.override_dtype == C.DTYPE_FP16: logger.warning( 'Experimental feature \'--override-dtype float16\' has been used. ' 'This feature may be removed or change its behaviour in future. ' 'DO NOT USE IT IN PRODUCTION!') models, source_vocabs, target_vocab, edge_vocab = inference.load_models( context=context, max_input_len=args.max_input_len, beam_size=args.beam_size, batch_size=args.batch_size, edge_vocab=edge_vocab, model_folders=args.models, checkpoints=args.checkpoints, softmax_temperature=args.softmax_temperature, max_output_length_num_stds=args.max_output_length_num_stds, decoder_return_logit_inputs=args.restrict_lexicon is not None, cache_output_layer_w_b=args.restrict_lexicon is not None, override_dtype=args.override_dtype) restrict_lexicon = None # type: Optional[TopKLexicon] if args.restrict_lexicon: restrict_lexicon = TopKLexicon(source_vocabs[0], target_vocab) restrict_lexicon.load(args.restrict_lexicon, k=args.restrict_lexicon_topk) store_beam = args.output_type == C.OUTPUT_HANDLER_BEAM_STORE translator = inference.Translator( context=context, ensemble_mode=args.ensemble_mode, bucket_source_width=args.bucket_width, length_penalty=inference.LengthPenalty(args.length_penalty_alpha, args.length_penalty_beta), beam_prune=args.beam_prune, beam_search_stop=args.beam_search_stop, models=models, source_vocabs=source_vocabs, target_vocab=target_vocab, edge_vocab=edge_vocab, restrict_lexicon=restrict_lexicon, avoid_list=args.avoid_list, store_beam=store_beam, strip_unknown_words=args.strip_unknown_words, skip_topk=args.skip_topk) read_and_translate(translator=translator, output_handler=output_handler, chunk_size=args.chunk_size, input_file=args.input, input_factors=args.input_factors, input_is_json=args.json_input)
def get_translator(self, context): """ Returns a translator for the given context :param context: model server context :return: """ params = arguments.ConfigArgumentParser(description='Translate CLI') arguments.add_translate_cli_args(params) sockeye_args_path = os.path.join(self.basedir, 'sockeye-args.txt') sockeye_args = params.parse_args(read_sockeye_args(sockeye_args_path)) # override models directory sockeye_args.models = [self.basedir] device_ids = [] if 'gpu_id' in context.system_properties: device_ids.append(context.system_properties['gpu_id']) else: logging.warning('No gpu_id found in context') device_ids.append(0) log_basic_info(sockeye_args) if sockeye_args.nbest_size > 1: if sockeye_args.output_type != const.OUTPUT_HANDLER_JSON: logging.warning( f'For n-best translation, you must specify --output-type {const.OUTPUT_HANDLER_JSON}' ) sockeye_args.output_type = const.OUTPUT_HANDLER_JSON output_handler = get_output_handler(sockeye_args.output_type, sockeye_args.output, sockeye_args.sure_align_threshold) with ExitStack() as exit_stack: check_condition( len(device_ids) == 1, 'translate only supports single device for now') translator_ctx = determine_context( device_ids=device_ids, use_cpu=sockeye_args.use_cpu, disable_device_locking=sockeye_args.disable_device_locking, lock_dir=sockeye_args.lock_dir, exit_stack=exit_stack)[0] logging.info(f'Translate Device: {translator_ctx}') models, source_vocabs, target_vocab = inference.load_models( context=translator_ctx, max_input_len=sockeye_args.max_input_len, beam_size=sockeye_args.beam_size, batch_size=sockeye_args.batch_size, model_folders=sockeye_args.models, checkpoints=sockeye_args.checkpoints, softmax_temperature=sockeye_args.softmax_temperature, max_output_length_num_stds=sockeye_args. max_output_length_num_stds, decoder_return_logit_inputs=sockeye_args.restrict_lexicon is not None, cache_output_layer_w_b=sockeye_args.restrict_lexicon is not None, override_dtype=sockeye_args.override_dtype, output_scores=output_handler.reports_score(), sampling=sockeye_args.sample) restrict_lexicon = None if sockeye_args.restrict_lexicon is not None: logging.info(str(sockeye_args.restrict_lexicon)) if len(sockeye_args.restrict_lexicon) == 1: # Single lexicon used for all inputs restrict_lexicon = TopKLexicon(source_vocabs[0], target_vocab) # Handle a single arg of key:path or path (parsed as path:path) restrict_lexicon.load(sockeye_args.restrict_lexicon[0][1], k=sockeye_args.restrict_lexicon_topk) else: check_condition( sockeye_args.json_input, 'JSON input is required when using multiple lexicons for vocabulary restriction' ) # Multiple lexicons with specified names restrict_lexicon = dict() for key, path in sockeye_args.restrict_lexicon: lexicon = TopKLexicon(source_vocabs[0], target_vocab) lexicon.load(path, k=sockeye_args.restrict_lexicon_topk) restrict_lexicon[key] = lexicon store_beam = sockeye_args.output_type == const.OUTPUT_HANDLER_BEAM_STORE brevity_penalty_weight = sockeye_args.brevity_penalty_weight if sockeye_args.brevity_penalty_type == const.BREVITY_PENALTY_CONSTANT: if sockeye_args.brevity_penalty_constant_length_ratio > 0.0: constant_length_ratio = sockeye_args.brevity_penalty_constant_length_ratio else: constant_length_ratio = sum( model.length_ratio_mean for model in models) / len(models) logging.info( f'Using average of constant length ratios saved in the model configs: {constant_length_ratio}' ) elif sockeye_args.brevity_penalty_type == const.BREVITY_PENALTY_LEARNED: constant_length_ratio = -1.0 elif sockeye_args.brevity_penalty_type == const.BREVITY_PENALTY_NONE: brevity_penalty_weight = 0.0 constant_length_ratio = -1.0 else: raise ValueError( f'Unknown brevity penalty type {sockeye_args.brevity_penalty_type}' ) brevity_penalty = None if brevity_penalty_weight != 0.0: brevity_penalty = inference.BrevityPenalty( brevity_penalty_weight) return inference.Translator( context=translator_ctx, ensemble_mode=sockeye_args.ensemble_mode, bucket_source_width=sockeye_args.bucket_width, length_penalty=inference.LengthPenalty( sockeye_args.length_penalty_alpha, sockeye_args.length_penalty_beta), beam_prune=sockeye_args.beam_prune, beam_search_stop=sockeye_args.beam_search_stop, nbest_size=sockeye_args.nbest_size, models=models, source_vocabs=source_vocabs, target_vocab=target_vocab, restrict_lexicon=restrict_lexicon, avoid_list=sockeye_args.avoid_list, store_beam=store_beam, strip_unknown_words=sockeye_args.strip_unknown_words, skip_topk=sockeye_args.skip_topk, sample=sockeye_args.sample, constant_length_ratio=constant_length_ratio, brevity_penalty=brevity_penalty)