Beispiel #1
0
def main():
    params = argparse.ArgumentParser(description='Translate CLI')
    arguments.add_translate_cli_args(params)
    args = params.parse_args()

    if args.output is not None:
        global logger
        logger = setup_main_logger(__name__,
                                   console=not args.quiet,
                                   file_logging=True,
                                   path="%s.%s" % (args.output, C.LOG_NAME))

    if args.checkpoints is not None:
        check_condition(
            len(args.checkpoints) == len(args.models),
            "must provide checkpoints for each model")

    log_basic_info(args)

    output_handler = sockeye.output_handler.get_output_handler(
        args.output_type, args.output, args.sure_align_threshold)

    with ExitStack() as exit_stack:
        context = _setup_context(args, exit_stack)

        models, vocab_source, vocab_target = sockeye.inference.load_models(
            context,
            args.max_input_len,
            args.beam_size,
            args.batch_size,
            args.models,
            args.checkpoints,
            args.softmax_temperature,
            args.max_output_length_num_stds,
            decoder_return_logit_inputs=args.restrict_lexicon is not None,
            cache_output_layer_w_b=args.restrict_lexicon is not None,
            input_dim=args.input_dim)
        restrict_lexicon = None  # type: TopKLexicon
        if args.restrict_lexicon:
            restrict_lexicon = TopKLexicon(vocab_source, vocab_target)
            restrict_lexicon.load(args.restrict_lexicon)
        translator = sockeye.inference.Translator(
            context,
            args.ensemble_mode,
            args.bucket_width,
            sockeye.inference.LengthPenalty(args.length_penalty_alpha,
                                            args.length_penalty_beta),
            models,
            vocab_source,
            vocab_target,
            restrict_lexicon,
            input_dim=args.input_dim)
        read_and_translate(translator, output_handler, args.chunk_size,
                           args.input)
Beispiel #2
0
def main():
    params = argparse.ArgumentParser(description='Translate CLI')
    arguments.add_translate_cli_args(params)
    args = params.parse_args()

    if args.output is not None:
        global logger
        logger = setup_main_logger(__name__,
                                   file_logging=True,
                                   path="%s.%s" % (args.output, C.LOG_NAME))

    if args.checkpoints is not None:
        check_condition(
            len(args.checkpoints) == len(args.models),
            "must provide checkpoints for each model")

    log_sockeye_version(logger)
    log_mxnet_version(logger)
    logger.info("Command: %s", " ".join(sys.argv))
    logger.info("Arguments: %s", args)

    output_handler = sockeye.output_handler.get_output_handler(
        args.output_type, args.output, args.sure_align_threshold)

    with ExitStack() as exit_stack:
        context = _setup_context(args, exit_stack)

        bucket_source_width, bucket_target_width = args.bucket_width
        translator = sockeye.inference.Translator(
            context, args.ensemble_mode, bucket_source_width,
            bucket_target_width,
            sockeye.inference.LengthPenalty(args.length_penalty_alpha,
                                            args.length_penalty_beta),
            *sockeye.inference.load_models(context, args.max_input_len,
                                           args.beam_size, args.models,
                                           args.checkpoints,
                                           args.softmax_temperature,
                                           args.max_output_length_num_stds))
        read_and_translate(translator, output_handler, args.input)
Beispiel #3
0
def main():
    params = argparse.ArgumentParser(description='Translate CLI')
    arguments.add_translate_cli_args(params)
    arguments.add_bpe_args(params)
    args = params.parse_args()

    with ExitStack() as exit_stack:
        if args.use_cpu:
            context = mx.cpu()
        else:
            num_gpus = get_num_gpus()
            check_condition(num_gpus >= 1,
                            "No GPUs found, consider running on the CPU with --use-cpu "
                            "(note: check depends on nvidia-smi and this could also mean that the nvidia-smi "
                            "binary isn't on the path).")
            check_condition(len(args.device_ids) == 1, "cannot run on multiple devices for now")
            gpu_id = args.device_ids[0]
            if args.disable_device_locking:
                if gpu_id < 0:
                    # without locking and a negative device id we just take the first device
                    gpu_id = 0
            else:
                gpu_ids = exit_stack.enter_context(acquire_gpus([gpu_id], lock_dir=args.lock_dir))
                gpu_id = gpu_ids[0]

            context = mx.gpu(gpu_id)

        models, source_vocabs, target_vocab = inference.load_models(
            context=context,
            max_input_len=args.max_input_len,
            beam_size=args.beam_size,
            batch_size=args.batch_size,
            model_folders=args.models,
            checkpoints=args.checkpoints,
            softmax_temperature=args.softmax_temperature,
            max_output_length_num_stds=args.max_output_length_num_stds,
            decoder_return_logit_inputs=args.restrict_lexicon is not None,
            cache_output_layer_w_b=args.restrict_lexicon is not None)

        translator = inference.Translator(context=context,
                                          ensemble_mode=args.ensemble_mode,
                                          bucket_source_width=args.bucket_width,
                                          length_penalty=inference.LengthPenalty(args.length_penalty_alpha,
                                                                                 args.length_penalty_beta),
                                          models=models,
                                          source_vocabs=source_vocabs,
                                          target_vocab=target_vocab,
                                          restrict_lexicon=None,
                                          store_beam=False,
                                          strip_unknown_words=args.strip_unknown_words)

        logger.info('Parsing vocabulary')
        sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', write_through=True, line_buffering=True)

        opened_vocab = codecs.open(args.bpe_vocabulary.name, encoding='utf-8')
        bpe_filtered_vocab = read_vocabulary(opened_vocab, args.bpe_vocabulary_threshold)
        bpe_merges = -1  # Apply all merge operations.
        bpe_separator = '@@'  # Use default BPE separator.
        bpe_glossaries = None # No excluded words.
        bpe = BPE(args.bpe_codes, bpe_merges, bpe_separator, bpe_filtered_vocab, bpe_glossaries)

        logger.info('Starting RPC server.')
        rpc_server = SockeyeRpcServer(translator, bpe)
        rpc_server.serve()
    def initialize(self, context):
        super(SockeyeService, self).initialize(context)

        self.basedir = context.system_properties.get('model_dir')
        self.preprocessor = ChineseCharPreprocessor(
            os.path.join(self.basedir, 'bpe.codes.zh-en'),
            os.path.join(self.basedir, 'scripts'),
            os.path.join(self.basedir, 'scripts'))
        self.postprocessor = Detokenizer(
            os.path.join(self.basedir, 'scripts', 'detokenize.pl'))

        params = arguments.ConfigArgumentParser(description='Translate CLI')
        arguments.add_translate_cli_args(params)

        sockeye_args_path = os.path.join(self.basedir, 'sockeye-args.txt')
        sockeye_args = params.parse_args(read_sockeye_args(sockeye_args_path))
        # override models directory
        sockeye_args.models = [self.basedir]

        if 'gpu_id' in context.system_properties:
            self.device_ids.append(context.system_properties['gpu_id'])
        else:
            logging.warning('No gpu_id found in context')
            self.device_ids.append(0)

        if sockeye_args.checkpoints is not None:
            check_condition(
                len(sockeye_args.checkpoints) == len(sockeye_args.models),
                'must provide checkpoints for each model')

        if sockeye_args.skip_topk:
            check_condition(
                sockeye_args.beam_size == 1,
                '--skip-topk has no effect if beam size is larger than 1')
            check_condition(
                len(sockeye_args.models) == 1,
                '--skip-topk has no effect for decoding with more than 1 model'
            )

        if sockeye_args.nbest_size > 1:
            check_condition(
                sockeye_args.beam_size >= sockeye_args.nbest_size,
                'Size of nbest list (--nbest-size) must be smaller or equal to beam size (--beam-size).'
            )
            check_condition(
                sockeye_args.beam_search_drop == const.BEAM_SEARCH_STOP_ALL,
                '--nbest-size > 1 requires beam search to only stop after all hypotheses are finished '
                '(--beam-search-stop all)')
            if sockeye_args.output_type != const.OUTPUT_HANDLER_NBEST:
                logging.warning(
                    'For nbest translation, output handler must be "%s", overriding option --output-type.',
                    const.OUTPUT_HANDLER_NBEST)
                sockeye_args.output_type = const.OUTPUT_HANDLER_NBEST

        log_basic_info(sockeye_args)

        output_handler = get_output_handler(sockeye_args.output_type,
                                            sockeye_args.output,
                                            sockeye_args.sure_align_threshold)

        with ExitStack() as exit_stack:
            check_condition(
                len(self.device_ids) == 1,
                'translate only supports single device for now')
            translator_ctx = determine_context(
                device_ids=self.device_ids,
                use_cpu=sockeye_args.use_cpu,
                disable_device_locking=sockeye_args.disable_device_locking,
                lock_dir=sockeye_args.lock_dir,
                exit_stack=exit_stack)[0]
            logging.info('Translate Device: %s', translator_ctx)

            if sockeye_args.override_dtype == const.DTYPE_FP16:
                logging.warning(
                    'Experimental feature \'--override-dtype float16\' has been used. '
                    'This feature may be removed or change its behavior in the future. '
                    'DO NOT USE IT IN PRODUCTION')

            models, source_vocabs, target_vocab = inference.load_models(
                context=translator_ctx,
                max_input_len=sockeye_args.max_input_len,
                beam_size=sockeye_args.beam_size,
                batch_size=sockeye_args.batch_size,
                model_folders=sockeye_args.models,
                checkpoints=sockeye_args.checkpoints,
                softmax_temperature=sockeye_args.softmax_temperature,
                max_output_length_num_stds=sockeye_args.
                max_output_length_num_stds,
                decoder_return_logit_inputs=sockeye_args.restrict_lexicon
                is not None,
                cache_output_layer_w_b=sockeye_args.restrict_lexicon
                is not None,
                override_dtype=sockeye_args.override_dtype,
                output_scores=output_handler.reports_score())
            restrict_lexicon = None
            if sockeye_args.restrict_lexicon:
                restrict_lexicon = TopKLexicon(source_vocabs[0], target_vocab)
                restrict_lexicon.load(sockeye_args.restrict_lexicon,
                                      k=sockeye_args.restrict_lexicon_topk)
            store_beam = sockeye_args.output_type == const.OUTPUT_HANDLER_BEAM_STORE
            self.translator = inference.Translator(
                context=translator_ctx,
                ensemble_mode=sockeye_args.ensemble_mode,
                bucket_source_width=sockeye_args.bucket_width,
                length_penalty=inference.LengthPenalty(
                    sockeye_args.length_penalty_alpha,
                    sockeye_args.length_penalty_beta),
                beam_prune=sockeye_args.beam_prune,
                beam_search_stop=sockeye_args.beam_search_stop,
                nbest_size=sockeye_args.nbest_size,
                models=models,
                source_vocabs=source_vocabs,
                target_vocab=target_vocab,
                restrict_lexicon=restrict_lexicon,
                avoid_list=sockeye_args.avoid_list,
                store_beam=store_beam,
                strip_unknown_words=sockeye_args.strip_unknown_words,
                skip_topk=sockeye_args.skip_topk)
Beispiel #5
0
def main():
    params = arguments.ConfigArgumentParser(description='Translate CLI')
    arguments.add_translate_cli_args(params)
    args = params.parse_args()
    run_translate(args)
Beispiel #6
0
    def get_translator(self, context):
        """
        Returns a translator for the given context
        :param context: model server context
        :return:
        """
        params = arguments.ConfigArgumentParser(description='Translate CLI')
        arguments.add_translate_cli_args(params)

        sockeye_args_path = os.path.join(self.basedir, 'sockeye-args.txt')
        sockeye_args = params.parse_args(read_sockeye_args(sockeye_args_path))
        # override models directory
        sockeye_args.models = [self.basedir]

        device_ids = []
        if 'gpu_id' in context.system_properties:
            device_ids.append(context.system_properties['gpu_id'])
        else:
            logging.warning('No gpu_id found in context')
            device_ids.append(0)

        log_basic_info(sockeye_args)

        if sockeye_args.nbest_size > 1:
            if sockeye_args.output_type != const.OUTPUT_HANDLER_JSON:
                logging.warning(
                    f'For n-best translation, you must specify --output-type {const.OUTPUT_HANDLER_JSON}'
                )
                sockeye_args.output_type = const.OUTPUT_HANDLER_JSON

        output_handler = get_output_handler(sockeye_args.output_type,
                                            sockeye_args.output,
                                            sockeye_args.sure_align_threshold)

        with ExitStack() as exit_stack:
            check_condition(
                len(device_ids) == 1,
                'translate only supports single device for now')
            translator_ctx = determine_context(
                device_ids=device_ids,
                use_cpu=sockeye_args.use_cpu,
                disable_device_locking=sockeye_args.disable_device_locking,
                lock_dir=sockeye_args.lock_dir,
                exit_stack=exit_stack)[0]
            logging.info(f'Translate Device: {translator_ctx}')

            models, source_vocabs, target_vocab = inference.load_models(
                context=translator_ctx,
                max_input_len=sockeye_args.max_input_len,
                beam_size=sockeye_args.beam_size,
                batch_size=sockeye_args.batch_size,
                model_folders=sockeye_args.models,
                checkpoints=sockeye_args.checkpoints,
                softmax_temperature=sockeye_args.softmax_temperature,
                max_output_length_num_stds=sockeye_args.
                max_output_length_num_stds,
                decoder_return_logit_inputs=sockeye_args.restrict_lexicon
                is not None,
                cache_output_layer_w_b=sockeye_args.restrict_lexicon
                is not None,
                override_dtype=sockeye_args.override_dtype,
                output_scores=output_handler.reports_score(),
                sampling=sockeye_args.sample)

            restrict_lexicon = None
            if sockeye_args.restrict_lexicon is not None:
                logging.info(str(sockeye_args.restrict_lexicon))
                if len(sockeye_args.restrict_lexicon) == 1:
                    # Single lexicon used for all inputs
                    restrict_lexicon = TopKLexicon(source_vocabs[0],
                                                   target_vocab)
                    # Handle a single arg of key:path or path (parsed as path:path)
                    restrict_lexicon.load(sockeye_args.restrict_lexicon[0][1],
                                          k=sockeye_args.restrict_lexicon_topk)
                else:
                    check_condition(
                        sockeye_args.json_input,
                        'JSON input is required when using multiple lexicons for vocabulary restriction'
                    )
                    # Multiple lexicons with specified names
                    restrict_lexicon = dict()
                    for key, path in sockeye_args.restrict_lexicon:
                        lexicon = TopKLexicon(source_vocabs[0], target_vocab)
                        lexicon.load(path,
                                     k=sockeye_args.restrict_lexicon_topk)
                        restrict_lexicon[key] = lexicon

            store_beam = sockeye_args.output_type == const.OUTPUT_HANDLER_BEAM_STORE

            brevity_penalty_weight = sockeye_args.brevity_penalty_weight
            if sockeye_args.brevity_penalty_type == const.BREVITY_PENALTY_CONSTANT:
                if sockeye_args.brevity_penalty_constant_length_ratio > 0.0:
                    constant_length_ratio = sockeye_args.brevity_penalty_constant_length_ratio
                else:
                    constant_length_ratio = sum(
                        model.length_ratio_mean
                        for model in models) / len(models)
                    logging.info(
                        f'Using average of constant length ratios saved in the model configs: {constant_length_ratio}'
                    )
            elif sockeye_args.brevity_penalty_type == const.BREVITY_PENALTY_LEARNED:
                constant_length_ratio = -1.0
            elif sockeye_args.brevity_penalty_type == const.BREVITY_PENALTY_NONE:
                brevity_penalty_weight = 0.0
                constant_length_ratio = -1.0
            else:
                raise ValueError(
                    f'Unknown brevity penalty type {sockeye_args.brevity_penalty_type}'
                )

            brevity_penalty = None
            if brevity_penalty_weight != 0.0:
                brevity_penalty = inference.BrevityPenalty(
                    brevity_penalty_weight)

            return inference.Translator(
                context=translator_ctx,
                ensemble_mode=sockeye_args.ensemble_mode,
                bucket_source_width=sockeye_args.bucket_width,
                length_penalty=inference.LengthPenalty(
                    sockeye_args.length_penalty_alpha,
                    sockeye_args.length_penalty_beta),
                beam_prune=sockeye_args.beam_prune,
                beam_search_stop=sockeye_args.beam_search_stop,
                nbest_size=sockeye_args.nbest_size,
                models=models,
                source_vocabs=source_vocabs,
                target_vocab=target_vocab,
                restrict_lexicon=restrict_lexicon,
                avoid_list=sockeye_args.avoid_list,
                store_beam=store_beam,
                strip_unknown_words=sockeye_args.strip_unknown_words,
                skip_topk=sockeye_args.skip_topk,
                sample=sockeye_args.sample,
                constant_length_ratio=constant_length_ratio,
                brevity_penalty=brevity_penalty)