Beispiel #1
0
 def __init__(self,
              total_steps: int,
              warmup: int = 0,
              t_scale: float = 1.0) -> None:
     super().__init__(warmup, t_scale)
     check_condition(total_steps >= 0, "total_steps need to be >= 0.")
     self.total_steps = total_steps
Beispiel #2
0
def determine_context(args: argparse.Namespace, exit_stack: ExitStack) -> List[mx.Context]:
    """
    Determine the context we should run on (CPU or GPU).

    :param args: Arguments as returned by argparse.
    :param exit_stack: An ExitStack from contextlib.
    :return: A list with the context(s) to run on.
    """
    if args.use_cpu:
        logger.info("Training Device: CPU")
        context = [mx.cpu()]
    else:
        num_gpus = utils.get_num_gpus()
        check_condition(num_gpus >= 1,
                        "No GPUs found, consider running on the CPU with --use-cpu "
                        "(note: check depends on nvidia-smi and this could also mean that the nvidia-smi "
                        "binary isn't on the path).")
        if args.disable_device_locking:
            context = utils.expand_requested_device_ids(args.device_ids)
        else:
            context = exit_stack.enter_context(utils.acquire_gpus(args.device_ids, lock_dir=args.lock_dir))
        if args.batch_type == C.BATCH_TYPE_SENTENCE:
            check_condition(args.batch_size % len(context) == 0, "When using multiple devices the batch size must be "
                                                                 "divisible by the number of devices. Choose a batch "
                                                                 "size that is a multiple of %d." % len(context))
        logger.info("Training Device(s): GPU %s", context)
        context = [mx.gpu(gpu_id) for gpu_id in context]
    return context
Beispiel #3
0
def check_encoder_decoder_args(args) -> None:
    """
    Check possible encoder-decoder argument conflicts.

    :param args: Arguments as returned by argparse.
    """
    encoder_embed_dropout, decoder_embed_dropout = args.embed_dropout
    encoder_rnn_dropout_inputs, decoder_rnn_dropout_inputs = args.rnn_dropout_inputs
    encoder_rnn_dropout_states, decoder_rnn_dropout_states = args.rnn_dropout_states
    if encoder_embed_dropout > 0 and encoder_rnn_dropout_inputs > 0:
        logger.warning(
            "Setting encoder RNN AND source embedding dropout > 0 leads to "
            "two dropout layers on top of each other.")
    if decoder_embed_dropout > 0 and decoder_rnn_dropout_inputs > 0:
        logger.warning(
            "Setting encoder RNN AND source embedding dropout > 0 leads to "
            "two dropout layers on top of each other.")
    encoder_rnn_dropout_recurrent, decoder_rnn_dropout_recurrent = args.rnn_dropout_recurrent
    if encoder_rnn_dropout_recurrent > 0 or decoder_rnn_dropout_recurrent > 0:
        check_condition(
            args.rnn_cell_type == C.LSTM_TYPE,
            "Recurrent dropout without memory loss only supported for LSTMs right now."
        )
    if args.rnn_decoder_stochastic:
        check_condition(
            args.optimized_metric,
            "When using stochastic models --optimized-metric needs to be 'elbo'."
        )
Beispiel #4
0
    def __init__(self,
                 rnn_config: rnn.RNNConfig,
                 prefix=C.BIDIRECTIONALRNN_PREFIX,
                 layout=C.TIME_MAJOR,
                 encoder_class: Callable = RecurrentEncoder) -> None:
        utils.check_condition(
            rnn_config.num_hidden % 2 == 0,
            "num_hidden must be a multiple of 2 for BiDirectionalRNNEncoders.")
        self.rnn_config = rnn_config
        self.internal_rnn_config = rnn_config.copy(
            num_hidden=rnn_config.num_hidden // 2)
        if layout[0] == 'N':
            logger.warning(
                "Batch-major layout for encoder input. Consider using time-major layout for faster speed"
            )

        # time-major layout as _encode needs to swap layout for SequenceReverse
        self.forward_rnn = encoder_class(rnn_config=self.internal_rnn_config,
                                         prefix=prefix + C.FORWARD_PREFIX,
                                         layout=C.TIME_MAJOR)
        self.reverse_rnn = encoder_class(rnn_config=self.internal_rnn_config,
                                         prefix=prefix + C.REVERSE_PREFIX,
                                         layout=C.TIME_MAJOR)
        self.layout = layout
        self.prefix = prefix
Beispiel #5
0
 def __init__(self,
              num_embed: int,
              prefix: str) -> None:
     utils.check_condition(num_embed % 2 == 0, "Positional embeddings require an even embedding size it "
                                               "is however %d." % num_embed)
     self.num_embed = num_embed
     self.prefix = prefix
Beispiel #6
0
 def __init__(self,
              learning_rate: float = 0.001,
              beta1: float = 0.9,
              beta2: float = 0.999,
              beta3_batch: float = 0.999,
              beta3_checkpoint: float = 0.,
              epsilon: float = 1e-8,
              k_lo: float = 0.1,
              k_hi: float = 10,
              schedule_decay: float = 0.004,
              use_batch_objective: bool = True,
              use_checkpoint_objective: bool = False,
              use_nesterov_momentum: bool = False,
              **kwargs) -> None:
     check_condition(
         any((use_batch_objective, use_checkpoint_objective)),
         "Must use at least one of: batch objective, checkpoint objective")
     super().__init__(learning_rate=learning_rate, **kwargs)
     self.beta1 = beta1
     self.beta2 = beta2
     self.beta3_batch = beta3_batch
     self.beta3_checkpoint = beta3_checkpoint
     self.epsilon = epsilon
     self.k_lo = k_lo
     self.k_hi = k_hi
     self.schedule_decay = schedule_decay
     self.use_batch_objective = use_batch_objective
     self.use_checkpoint_objective = use_checkpoint_objective
     self.use_nesterov_momentum = use_nesterov_momentum
Beispiel #7
0
def make_inputs(inp: Optional[str],
                translator: inference.Translator,
                json_input: bool,
                inp_factors: Optional[List[str]] = None) -> Generator[inference.TranslatorInput, None, None]:
    """
    Generates TranslatorInput instances from input. If input is None, reads from stdin. If num_input_factors > 1,
    the function will look for factors attached to each token, separated by '|'.
    If source is not None, reads from the source file. If num_source_factors > 1, num_source_factors source factor
    filenames are required.

    :param inp: The source file (possibly None).
    :param translator: Translator that will translate each line of input.
    :param json_input: Whether the input is in json format.
    :param inp_factors: Source factor files.
    :return: TranslatorInput objects.
    """
    if inp is None:
        check_condition(inp_factors is None, "Translating from STDIN, not expecting any factor files.")
        for sentence_id, line in enumerate(sys.stdin, 1):
            if json_input:
                yield inference.make_input_from_json_string(sentence_id=sentence_id, json_string=line)
            else:
                yield inference.make_input_from_factored_string(sentence_id=sentence_id,
                                                                factored_string=line,
                                                                translator=translator)
    else:
        inp_factors = [] if inp_factors is None else inp_factors
        inputs = [inp] + inp_factors
        check_condition(translator.num_source_factors == len(inputs),
                        "Model(s) require %d factors, but %d given (through --input and --input-factors)." % (
                            translator.num_source_factors, len(inputs)))
        with ExitStack() as exit_stack:
            streams = [exit_stack.enter_context(data_io.smart_open(i)) for i in inputs]
            for sentence_id, inputs in enumerate(zip(*streams), 1):
                yield inference.make_input_from_multiple_strings(sentence_id=sentence_id, strings=list(inputs))
Beispiel #8
0
def _setup_context(args, exit_stack):
    if args.use_cpu:
        context = mx.cpu()
    else:
        num_gpus = get_num_gpus()
        check_condition(
            num_gpus >= 1,
            "No GPUs found, consider running on the CPU with --use-cpu "
            "(note: check depends on nvidia-smi and this could also mean that the nvidia-smi "
            "binary isn't on the path).")
        check_condition(
            len(args.device_ids) == 1,
            "cannot run on multiple devices for now")
        gpu_id = args.device_ids[0]
        if args.disable_device_locking:
            # without locking and a negative device id we just take the first device
            gpu_id = 0
        else:
            if gpu_id < 0:
                # get a single (!) gpu id automatically:
                gpu_ids = exit_stack.enter_context(
                    acquire_gpus([-1], lock_dir=args.lock_dir))
                gpu_id = gpu_ids[0]
        context = mx.gpu(gpu_id)
    return context
Beispiel #9
0
    def __init__(self,
                 config: ConvolutionalEmbeddingConfig,
                 prefix: str = C.CHAR_SEQ_ENCODER_PREFIX) -> None:
        utils.check_condition(len(config.num_filters) == config.max_filter_width,
                              "num_filters must have max_filter_width elements.")
        self.num_embed = config.num_embed
        self.output_dim = config.output_dim
        self.max_filter_width = config.max_filter_width
        self.num_filters = config.num_filters[:]
        self.pool_stride = config.pool_stride
        self.num_highway_layers = config.num_highway_layers
        self.prefix = prefix
        self.dropout = config.dropout
        self.add_positional_encoding = config.add_positional_encoding

        self.conv_weight = {filter_width: mx.sym.Variable("%s%s%d%s" % (self.prefix, "conv_", filter_width, "_weight"))
                            for filter_width in range(1, self.max_filter_width + 1)}
        self.conv_bias = {filter_width: mx.sym.Variable("%s%s%d%s" % (self.prefix, "conv_", filter_width, "_bias"))
                          for filter_width in range(1, self.max_filter_width + 1)}

        self.project_weight = mx.sym.Variable(self.prefix + "project_weight")
        self.project_bias = mx.sym.Variable(self.prefix + "project_bias")

        self.gate_weight = [mx.sym.Variable("%s%s%d%s" % (self.prefix, "gate_", i, "_weight"))
                            for i in range(self.num_highway_layers)]
        self.gate_bias = [mx.sym.Variable("%s%s%d%s" % (self.prefix, "gate_", i, "_bias"))
                          for i in range(self.num_highway_layers)]

        self.transform_weight = [mx.sym.Variable("%s%s%d%s" % (self.prefix, "transform_", i, "_weight"))
                                 for i in range(self.num_highway_layers)]
        self.transform_bias = [mx.sym.Variable("%s%s%d%s" % (self.prefix, "transform_", i, "_bias"))
                               for i in range(self.num_highway_layers)]
Beispiel #10
0
def main():
    params = argparse.ArgumentParser(description='Translate CLI')
    arguments.add_translate_cli_args(params)
    args = params.parse_args()

    if args.output is not None:
        global logger
        logger = setup_main_logger(__name__,
                                   console=not args.quiet,
                                   file_logging=True,
                                   path="%s.%s" % (args.output, C.LOG_NAME))

    if args.checkpoints is not None:
        check_condition(
            len(args.checkpoints) == len(args.models),
            "must provide checkpoints for each model")

    log_basic_info(args)

    output_handler = get_output_handler(args.output_type, args.output,
                                        args.sure_align_threshold)

    with ExitStack() as exit_stack:
        context = _setup_context(args, exit_stack)

        models, source_vocabs, target_vocab = inference.load_models(
            context=context,
            max_input_len=args.max_input_len,
            beam_size=args.beam_size,
            batch_size=args.batch_size,
            model_folders=args.models,
            checkpoints=args.checkpoints,
            softmax_temperature=args.softmax_temperature,
            max_output_length_num_stds=args.max_output_length_num_stds,
            decoder_return_logit_inputs=args.restrict_lexicon is not None,
            cache_output_layer_w_b=args.restrict_lexicon is not None)
        restrict_lexicon = None  # type: Optional[TopKLexicon]
        if args.restrict_lexicon:
            restrict_lexicon = TopKLexicon(source_vocabs[0], target_vocab)
            restrict_lexicon.load(args.restrict_lexicon,
                                  k=args.restrict_lexicon_topk)
        store_beam = args.output_type == C.OUTPUT_HANDLER_BEAM_STORE
        translator = inference.Translator(
            context=context,
            ensemble_mode=args.ensemble_mode,
            bucket_source_width=args.bucket_width,
            length_penalty=inference.LengthPenalty(args.length_penalty_alpha,
                                                   args.length_penalty_beta),
            models=models,
            source_vocabs=source_vocabs,
            target_vocab=target_vocab,
            restrict_lexicon=restrict_lexicon,
            store_beam=store_beam,
            strip_unknown_words=args.strip_unknown_words)
        read_and_translate(translator=translator,
                           output_handler=output_handler,
                           chunk_size=args.chunk_size,
                           inp=args.input,
                           inp_factors=args.input_factors,
                           json_input=args.json_input)
Beispiel #11
0
def average(param_paths: Iterable[str]) -> Dict[str, mx.nd.NDArray]:
    """
    Averages parameters from a list of .params file paths.

    :param param_paths: List of paths to parameter files.
    :return: Averaged parameter dictionary.
    """
    all_arg_params = []
    all_aux_params = []
    for path in param_paths:
        logger.info("Loading parameters from '%s'", path)
        arg_params, aux_params = sockeye.utils.load_params(path)
        all_arg_params.append(arg_params)
        all_aux_params.append(aux_params)

    logger.info("%d models loaded", len(all_arg_params))
    check_condition(
        all(all_arg_params[0].keys() == p.keys() for p in all_arg_params),
        "arg_param names do not match across models")
    check_condition(
        all(all_aux_params[0].keys() == p.keys() for p in all_aux_params),
        "aux_param names do not match across models")

    avg_params = {}
    # average arg_params
    for k in all_arg_params[0]:
        arrays = [p[k] for p in all_arg_params]
        avg_params["arg:" + k] = sockeye.utils.average_arrays(arrays)
    # average aux_params
    for k in all_aux_params[0]:
        arrays = [p[k] for p in all_aux_params]
        avg_params["aux:" + k] = sockeye.utils.average_arrays(arrays)

    return avg_params
Beispiel #12
0
def main():
    params = argparse.ArgumentParser(description='Translate CLI')
    arguments.add_inference_args(params)
    arguments.add_device_args(params)
    args = params.parse_args()

    if args.output is not None:
        global logger
        logger = setup_main_logger(__name__,
                                   file_logging=True,
                                   path="%s.%s" % (args.output, C.LOG_NAME))

    if args.checkpoints is not None:
        check_condition(
            len(args.checkpoints) == len(args.models),
            "must provide checkpoints for each model")

    log_sockeye_version(logger)
    logger.info("Command: %s", " ".join(sys.argv))
    logger.info("Arguments: %s", args)

    output_handler = sockeye.output_handler.get_output_handler(
        args.output_type, args.output, args.sure_align_threshold)

    with ExitStack() as exit_stack:
        context = _setup_context(args, exit_stack)

        translator = sockeye.inference.Translator(
            context, args.ensemble_mode,
            *sockeye.inference.load_models(context, args.max_input_len,
                                           args.beam_size, args.models,
                                           args.checkpoints,
                                           args.softmax_temperature))
        read_and_translate(translator, output_handler, args.input)
Beispiel #13
0
def determine_context(args: argparse.Namespace,
                      exit_stack: ExitStack) -> List[mx.Context]:
    """
    Determine the context we should run on (CPU or GPU).

    :param args: Arguments as returned by argparse.
    :param exit_stack: An ExitStack from contextlib.
    :return: A list with the context(s) to run on.
    """
    if args.use_cpu:
        logger.info("Device: CPU")
        context = [mx.cpu()]
    else:
        num_gpus = utils.get_num_gpus()
        check_condition(
            num_gpus >= 1,
            "No GPUs found, consider running on the CPU with --use-cpu "
            "(note: check depends on nvidia-smi and this could also mean that the nvidia-smi "
            "binary isn't on the path).")
        if args.disable_device_locking:
            context = utils.expand_requested_device_ids(args.device_ids)
        else:
            context = exit_stack.enter_context(
                utils.acquire_gpus(args.device_ids, lock_dir=args.lock_dir))
        logger.info("Device(s): GPU %s", context)
        context = [mx.gpu(gpu_id) for gpu_id in context]
    return context
Beispiel #14
0
def read_sentences(path: str,
                   vocab: Dict[str, int],
                   add_bos=False,
                   limit=None) -> List[List[int]]:
    """
    Reads sentences from path and creates word id sentences.

    :param path: Path to read data from.
    :param vocab: Vocabulary mapping.
    :param add_bos: Whether to add Beginning-Of-Sentence (BOS) symbol.
    :param limit: Read limit.
    :return: List of integer sequences.
    """
    assert C.UNK_SYMBOL in vocab
    assert C.UNK_SYMBOL in vocab
    assert vocab[C.PAD_SYMBOL] == C.PAD_ID
    assert C.BOS_SYMBOL in vocab
    assert C.EOS_SYMBOL in vocab
    sentences = []
    for sentence_tokens in read_content(path, limit):
        sentence = tokens2ids(sentence_tokens, vocab)
        check_condition(sentence, "Empty sentence in file %s" % path)
        if add_bos:
            sentence.insert(0, vocab[C.BOS_SYMBOL])
        sentences.append(sentence)
    logger.info("%d sentences loaded from '%s'", len(sentences), path)
    return sentences
Beispiel #15
0
    def __init__(self,
                 weight_type: str,
                 num_embed: int,
                 max_seq_len: int,
                 scale_up_input: bool,
                 scale_down_positions: bool,
                 dtype: Optional[pt.dtype] = None) -> None:
        utils.check_condition(
            num_embed % 2 == 0,
            "Positional embeddings require an even embedding size it "
            "is however %d." % num_embed)
        super().__init__()
        self.weight_type = weight_type
        self.num_embed = num_embed
        self.max_seq_len = max_seq_len
        self.scale_up_input = scale_up_input
        self.scale_down_positions = scale_down_positions

        if self.weight_type == C.FIXED_POSITIONAL_EMBEDDING:
            weight = get_positional_embeddings(length=self.max_seq_len,
                                               depth=self.num_embed)
            if self.scale_down_positions:
                weight *= self.num_embed**-0.5
            if dtype is not None:
                weight = weight.to(dtype)
            self.weight = pt.nn.Parameter(weight, requires_grad=False)
        elif self.weight_type == C.LEARNED_POSITIONAL_EMBEDDING:
            self.weight = pt.nn.Parameter(
                pt.empty(self.max_seq_len, self.num_embed, dtype=dtype))
        else:
            raise ValueError("weight_type '%s' is not supported!" %
                             self.weight_type)
Beispiel #16
0
def main():
    params = argparse.ArgumentParser(description='Evaluate translations by calculating metrics with '
                                                 'respect to a reference set. If multiple hypotheses files are given'
                                                 'the mean and standard deviation of the metrics are reported.')
    arguments.add_evaluate_args(params)
    arguments.add_logging_args(params)
    args = params.parse_args()

    if args.quiet:
        logger.setLevel(logging.ERROR)

    utils.check_condition(args.offset >= 0, "Offset should be non-negative.")
    log_sockeye_version(logger)

    logger.info("Command: %s", " ".join(sys.argv))
    logger.info("Arguments: %s", args)

    references = [' '.join(e) for e in data_io.read_content(args.references)]
    all_hypotheses = [[h.strip() for h in hypotheses] for hypotheses in args.hypotheses]
    if not args.not_strict:
        for hypotheses in all_hypotheses:
            utils.check_condition(len(hypotheses) == len(references),
                                  "Number of hypotheses (%d) and references (%d) does not match." % (len(hypotheses),
                                                                                                     len(references)))
    logger.info("%d hypothesis set(s) | %d hypotheses | %d references",
                len(all_hypotheses), len(all_hypotheses[0]), len(references))

    metric_info = ["%s\t(s_opt)" % name for name in args.metrics]
    logger.info("\t".join(metric_info))

    metrics = []  # type: List[Tuple[str, Callable]]
    for name in args.metrics:
        if name == C.BLEU:
            func = partial(raw_corpus_bleu, offset=args.offset)
        elif name == C.CHRF:
            func = raw_corpus_chrf
        elif name == C.ROUGE1:
            func = raw_corpus_rouge1
        elif name == C.ROUGE2:
            func = raw_corpus_rouge2
        elif name == C.ROUGEL:
            func = raw_corpus_rougel
        else:
            raise ValueError("Unknown metric %s." % name)
        metrics.append((name, func))

    if not args.sentence:
        scores = defaultdict(list)  # type: Dict[str, List[float]]
        for hypotheses in all_hypotheses:
            for name, metric in metrics:
                scores[name].append(metric(hypotheses, references))
        _print_mean_std_score(metrics, scores)
    else:
        for hypotheses in all_hypotheses:
            for h, r in zip(hypotheses, references):
                scores = defaultdict(list)  # type: Dict[str, List[float]]
                for name, metric in metrics:
                    scores[name].append(metric([h], [r]))
                _print_mean_std_score(metrics, scores)
Beispiel #17
0
    def _populate_bucket_batch_sizes(self):
        """
        Compute bucket-specific batch sizes (sentences, average_words) and default bucket batch
        size.

        If sentence-based batching: number of sentences is the same for each batch, determines the
        number of words.

        If word-based batching: number of sentences for each batch is set to the multiple of number
        of devices that produces the number of words closest to the target batch size.  Average
        target sentence length (non-padding symbols) is used for word number calculations.

        Sets: self.bucket_batch_sizes
        """
        # Pre-defined bucket batch sizes
        if self.bucket_batch_sizes is not None:
            return
        # Otherwise compute here
        self.bucket_batch_sizes = [None for _ in self.buckets]
        largest_total_batch_size = 0
        for buck_idx, bucket_shape in enumerate(self.buckets):
            # Target/label length with padding
            padded_seq_len = bucket_shape[1]
            # Average target/label length excluding padding
            average_seq_len = self.data_label_average_len[buck_idx]
            # Word-based: num words determines num sentences
            # Sentence-based: num sentences determines num words
            if self.batch_by_words:
                check_condition(
                    padded_seq_len <= self.batch_size,
                    "Word batch size must cover sequence lengths for all"
                    " buckets: (%d > %d)" % (padded_seq_len, self.batch_size))
                # Multiple of number of devices (int) closest to target number of words, assuming each sentence is of
                # average length
                batch_size_seq = self.batch_num_devices * round(
                    (self.batch_size / average_seq_len) /
                    self.batch_num_devices)
                batch_size_word = batch_size_seq * average_seq_len
            else:
                batch_size_seq = self.batch_size
                batch_size_word = batch_size_seq * average_seq_len
            self.bucket_batch_sizes[buck_idx] = BucketBatchSize(
                batch_size_seq, batch_size_word)
            # Track largest batch size by total elements
            largest_total_batch_size = max(largest_total_batch_size,
                                           batch_size_seq * max(*bucket_shape))
        # Final step: guarantee that largest bucket by sequence length also has largest total batch size.
        # When batching by sentences, this will already be the case.
        if self.batch_by_words:
            padded_seq_len = max(*self.buckets[-1])
            average_seq_len = self.data_label_average_len[-1]
            while self.bucket_batch_sizes[
                    -1].batch_size * padded_seq_len < largest_total_batch_size:
                self.bucket_batch_sizes[-1] = BucketBatchSize(
                    self.bucket_batch_sizes[-1].batch_size +
                    self.batch_num_devices,
                    self.bucket_batch_sizes[-1].average_words_per_batch +
                    self.batch_num_devices * average_seq_len)
Beispiel #18
0
 def __init__(self, updates_per_checkpoint: int, half_life: int, warmup: int = 0) -> None:
     super().__init__(warmup)
     check_condition(updates_per_checkpoint > 0, "updates_per_checkpoint needs to be > 0.")
     check_condition(half_life > 0, "half_life needs to be > 0.")
     # 0.5 base_lr = base_lr * sqrt(1 + T * factor)
     # then factor = 3 ./ T, with T = half_life * updates_per_checkpoint
     self.factor = 3. / (half_life * updates_per_checkpoint)
     self.t_last_log = -1
     self.log_every_t = int(half_life * updates_per_checkpoint)
Beispiel #19
0
 def __init__(self,
              base_lr: float = 1.0,
              warmup: int = 0,
              t_scale: float = 1.0) -> None:
     self.base_lr = base_lr
     check_condition(warmup >= 0, "warmup needs to be >= 0.")
     self.warmup = warmup
     self.t_scale = t_scale
     self.lr = None  # type: Optional[float]
Beispiel #20
0
 def __init__(self, updates_per_checkpoint: int, half_life: int, warmup: int = 0) -> None:
     super().__init__(warmup)
     check_condition(updates_per_checkpoint > 0, "updates_per_checkpoint needs to be > 0.")
     check_condition(half_life > 0, "half_life needs to be > 0.")
     # 0.5 base_lr = base_lr * sqrt(1 + T * factor)
     # then factor = 3 ./ T, with T = half_life * updates_per_checkpoint
     self.factor = 3. / (half_life * updates_per_checkpoint)
     self.t_last_log = -1
     self.log_every_t = int(half_life * updates_per_checkpoint)
Beispiel #21
0
 def get_num_hidden(self) -> int:
     """
     Return the representation size of this encoder.
     """
     if isinstance(self.encoders[-1], BatchMajor2TimeMajor):
         utils.check_condition(len(self.encoders) > 1,
                               "Cannot return num_hidden from a BatchMajor2TimeMajor encoder only")
         return self.encoders[-2].get_num_hidden()
     else:
         return self.encoders[-1].get_num_hidden()
Beispiel #22
0
    def __init__(self,
                 config: RecurrentDecoderConfig,
                 attention: attentions.Attention,
                 lexicon: Optional[lexicons.Lexicon] = None,
                 prefix=C.DECODER_PREFIX) -> None:
        # TODO: implement variant without input feeding
        self.rnn_config = config.rnn_config
        self.target_vocab_size = config.vocab_size
        self.num_target_embed = config.num_embed
        self.attention = attention
        self.weight_tying = config.weight_tying
        self.context_gating = config.context_gating
        self.layer_norm = config.layer_normalization
        self.lexicon = lexicon
        self.prefix = prefix

        self.num_hidden = self.rnn_config.num_hidden

        if self.context_gating:
            self.gate_w = mx.sym.Variable("%sgate_weight" % prefix)
            self.gate_b = mx.sym.Variable("%sgate_bias" % prefix)
            self.mapped_rnn_output_w = mx.sym.Variable(
                "%smapped_rnn_output_weight" % prefix)
            self.mapped_rnn_output_b = mx.sym.Variable(
                "%smapped_rnn_output_bias" % prefix)
            self.mapped_context_w = mx.sym.Variable("%smapped_context_weight" %
                                                    prefix)
            self.mapped_context_b = mx.sym.Variable("%smapped_context_bias" %
                                                    prefix)

        # Stacked RNN
        self.rnn = rnn.get_stacked_rnn(self.rnn_config, self.prefix)
        # RNN init state parameters
        self._create_layer_parameters()

        # Hidden state parameters
        self.hidden_w = mx.sym.Variable("%shidden_weight" % prefix)
        self.hidden_b = mx.sym.Variable("%shidden_bias" % prefix)
        self.hidden_norm = LayerNormalization(
            self.num_hidden, prefix="%shidden_norm" %
            prefix) if self.layer_norm else None
        # Embedding & output parameters
        self.embedding = encoder.Embedding(self.num_target_embed,
                                           self.target_vocab_size,
                                           prefix=C.TARGET_EMBEDDING_PREFIX,
                                           dropout=0.)  # TODO dropout?
        if self.weight_tying:
            check_condition(
                self.num_hidden == self.num_target_embed,
                "Weight tying requires target embedding size and rnn_num_hidden to be equal"
            )
            self.cls_w = self.embedding.embed_weight
        else:
            self.cls_w = mx.sym.Variable("%scls_weight" % prefix)
        self.cls_b = mx.sym.Variable("%scls_bias" % prefix)
Beispiel #23
0
 def __init__(self, updates_per_checkpoint: int, half_life: int) -> None:
     check_condition(updates_per_checkpoint > 0,
                     "updates_per_checkpoint needs to be > 0.")
     check_condition(half_life > 0, "half_life needs to be > 0.")
     # Note: will be overwritten by optimizer
     self.base_lr = None
     # 0.5 base_lr = base_lr * (1 + T * factor)
     # then factor = 1 ./ T, with T = half_life * updates_per_checkpoint
     self.factor = 1. / (half_life * updates_per_checkpoint)
     self.t_last_log = -1
     self.log_every_t = int(half_life * updates_per_checkpoint)
Beispiel #24
0
def check_arg_compatibility(args: argparse.Namespace):
    """
    Check if some arguments are incompatible with each other.

    :param args: Arguments as returned by argparse.
    """
    if args.use_fused_rnn:
        check_condition(not args.use_cpu, "GPU required for FusedRNN cells")

    check_condition(args.optimized_metric == C.BLEU or args.optimized_metric in args.metrics,
                    "Must optimize either BLEU or one of tracked metrics (--metrics)")
Beispiel #25
0
def main():
    params = argparse.ArgumentParser(description='Translate CLI')
    arguments.add_translate_cli_args(params)
    args = params.parse_args()

    if args.output is not None:
        global logger
        logger = setup_main_logger(__name__,
                                   console=not args.quiet,
                                   file_logging=True,
                                   path="%s.%s" % (args.output, C.LOG_NAME))

    if args.checkpoints is not None:
        check_condition(
            len(args.checkpoints) == len(args.models),
            "must provide checkpoints for each model")

    log_basic_info(args)

    output_handler = sockeye.output_handler.get_output_handler(
        args.output_type, args.output, args.sure_align_threshold)

    with ExitStack() as exit_stack:
        context = _setup_context(args, exit_stack)

        models, vocab_source, vocab_target = sockeye.inference.load_models(
            context,
            args.max_input_len,
            args.beam_size,
            args.batch_size,
            args.models,
            args.checkpoints,
            args.softmax_temperature,
            args.max_output_length_num_stds,
            decoder_return_logit_inputs=args.restrict_lexicon is not None,
            cache_output_layer_w_b=args.restrict_lexicon is not None,
            input_dim=args.input_dim)
        restrict_lexicon = None  # type: TopKLexicon
        if args.restrict_lexicon:
            restrict_lexicon = TopKLexicon(vocab_source, vocab_target)
            restrict_lexicon.load(args.restrict_lexicon)
        translator = sockeye.inference.Translator(
            context,
            args.ensemble_mode,
            args.bucket_width,
            sockeye.inference.LengthPenalty(args.length_penalty_alpha,
                                            args.length_penalty_beta),
            models,
            vocab_source,
            vocab_target,
            restrict_lexicon,
            input_dim=args.input_dim)
        read_and_translate(translator, output_handler, args.chunk_size,
                           args.input)
Beispiel #26
0
    def __init__(self,
                 model_folder: str,
                 context: mx.context.Context,
                 fused: bool,
                 max_input_len: Optional[int],
                 beam_size: int,
                 checkpoint: Optional[int] = None,
                 softmax_temperature: Optional[float] = None):
        # load config & determine parameter file
        super().__init__(
            sockeye.model.SockeyeModel.load_config(
                os.path.join(model_folder, C.CONFIG_NAME)))
        fname_params = os.path.join(
            model_folder,
            C.PARAMS_NAME % checkpoint if checkpoint else C.PARAMS_BEST_NAME)

        if max_input_len is None:
            max_input_len = self.config.max_seq_len
        else:
            if max_input_len != self.config.max_seq_len:
                logger.warning(
                    "Model was trained with max_seq_len=%d, but using max_input_len=%d.",
                    self.config.max_seq_len, max_input_len)
        self.max_input_len = max_input_len

        check_condition(
            beam_size < self.config.vocab_target_size,
            'The beam size must be smaller than the target vocabulary size.')

        self.beam_size = beam_size
        self.softmax_temperature = softmax_temperature
        self.encoder_batch_size = 1
        self.context = context

        self._build_model_components(self.max_input_len, fused)
        self.encoder_module, self.decoder_module = self._build_modules()

        self.decoder_data_shapes_cache = dict()  # bucket_key -> shape cache
        max_encoder_data_shapes = self._get_encoder_data_shapes(
            self.max_input_len)
        max_decoder_data_shapes = self._get_decoder_data_shapes(
            self.max_input_len)
        self.encoder_module.bind(data_shapes=max_encoder_data_shapes,
                                 for_training=False,
                                 grad_req="null")
        self.decoder_module.bind(data_shapes=max_decoder_data_shapes,
                                 for_training=False,
                                 grad_req="null")

        self.load_params_from_file(fname_params)
        self.encoder_module.init_params(arg_params=self.params,
                                        allow_missing=False)
        self.decoder_module.init_params(arg_params=self.params,
                                        allow_missing=False)
def iterate_doc_level(source_iterators: Iterator,
                      source_pre_iterators: Sequence[Iterator],
                      source_nxt_iterators: Sequence[Iterator],
                      target_pre_iterators: Sequence[Iterator],
                      target_nxt_iterators: Sequence[Iterator]):
    """
    Generator that yields current source sentences and context sentences.

    :param source_iterators: Current source sentences.
    :param source_pre_iterators: Previous source sentences.
    :param source_nxt_iterators: Next source sentences.
    :param target_pre_iterators: Previous target sentences.
    :param target_nxt_iterators: Next target sentences.
    :return: Current source string sequence including context ones.
    """
    while True:
        try:
            sources = next(source_iterators)
            source_pre = [
                next(source_pre_iter)
                for source_pre_iter in source_pre_iterators
            ]
            source_nxt = [
                next(source_nxt_iter)
                for source_nxt_iter in source_nxt_iterators
            ]
            target_pre = [
                next(target_pre_iter)
                for target_pre_iter in target_pre_iterators
            ]
            target_nxt = [
                next(target_nxt_iter)
                for target_nxt_iter in target_nxt_iterators
            ]
        except StopIteration:
            break
        yield source_pre, sources, source_nxt, target_pre, target_nxt

    check_condition(
        next(cast(Iterator, source_iterators), None) is None,
        "Different number of lines in the source original data")

    check_condition(
        all(
            next(cast(Iterator, src_pre), None) is None
            for src_pre in source_pre_iterators) and all(
                next(cast(Iterator, src_nxt), None) is None
                for src_nxt in source_nxt_iterators) and all(
                    next(cast(Iterator, tar_pre), None) is None
                    for tar_pre in target_pre_iterators) and all(
                        next(cast(Iterator, tar_nxt), None) is None
                        for tar_nxt in target_nxt_iterators),
        "Different number of lines in the additional data")
Beispiel #28
0
def get_recurrent_encoder(config: RecurrentEncoderConfig, fused: bool,
                          embed_weight: Optional[mx.sym.Symbol] = None) -> 'Encoder':
    """
    Returns a recurrent encoder with embedding, batch2time-major conversion, and bidirectional RNN.
    If num_layers > 1, adds additional uni-directional RNNs.

    :param config: Configuration for recurrent encoder.
    :param fused: Whether to use FusedRNNCell (CuDNN). Only works with GPU context.
    :param embed_weight: Optionally use an existing embedding matrix instead of creating a new one.
    :return: Encoder instance.
    """
    # TODO give more control on encoder architecture
    encoders = list()  # type: List[Encoder]

    encoders.append(Embedding(num_embed=config.num_embed,
                              vocab_size=config.vocab_size,
                              prefix=C.SOURCE_EMBEDDING_PREFIX,
                              dropout=config.embed_dropout,
                              embed_weight=embed_weight))

    if config.conv_config is not None:
        encoders.append(ConvolutionalEmbeddingEncoder(config.conv_config,
                                                      prefix=C.CHAR_SEQ_ENCODER_PREFIX))
        if config.conv_config.add_positional_encoding:
            # If specified, add positional encodings to segment embeddings
            encoders.append(AddSinCosPositionalEmbeddings(num_embed=config.num_embed,
                                                          prefix="%sadd_positional_encodings" % C.CHAR_SEQ_ENCODER_PREFIX))

    encoders.append(BatchMajor2TimeMajor())

    if config.reverse_input:
        encoders.append(ReverseSequence())

    if config.rnn_config.residual:
        utils.check_condition(config.rnn_config.first_residual_layer >= 2,
                              "Residual connections on the first encoder layer are not supported")

    encoder_class = FusedRecurrentEncoder if fused else RecurrentEncoder
    # One layer bi-directional RNN:
    encoders.append(BiDirectionalRNNEncoder(rnn_config=config.rnn_config.copy(num_layers=1),
                                            prefix=C.BIDIRECTIONALRNN_PREFIX,
                                            layout=C.TIME_MAJOR))

    if config.rnn_config.num_layers > 1:
        # Stacked uni-directional RNN:
        # Because we already have a one layer bi-rnn we reduce the num_layers as well as the first_residual_layer.
        remaining_rnn_config = config.rnn_config.copy(num_layers=config.rnn_config.num_layers - 1,
                                                      first_residual_layer=config.rnn_config.first_residual_layer - 1)
        encoders.append(encoder_class(rnn_config=remaining_rnn_config,
                                      prefix=C.STACKEDRNN_PREFIX,
                                      layout=C.TIME_MAJOR))

    return EncoderSequence(encoders)
Beispiel #29
0
    def __init__(self, reduce_factor: float, reduce_num_not_improved: int, warmup: int = 0) -> None:
        super().__init__(warmup)
        check_condition(0.0 < reduce_factor <= 1, "reduce_factor should be in ]0,1].")
        self.reduce_factor = reduce_factor
        self.reduce_num_not_improved = reduce_num_not_improved
        self.num_not_improved = 0

        self.lr = None  # type: float
        self.t_last_log = -1
        self.warmed_up = not self.warmup > 0
        logger.info("Will reduce the learning rate by a factor of %.2f whenever"
                    " the validation score doesn't improve %d times.",
                    reduce_factor, reduce_num_not_improved)
Beispiel #30
0
    def __init__(self, reduce_factor: float, reduce_num_not_improved: int, warmup: int = 0) -> None:
        super().__init__(warmup)
        check_condition(0.0 < reduce_factor <= 1, "reduce_factor should be in ]0,1].")
        self.reduce_factor = reduce_factor
        self.reduce_num_not_improved = reduce_num_not_improved
        self.num_not_improved = 0

        self.lr = None  # type: float
        self.t_last_log = -1
        self.warmed_up = not self.warmup > 0
        logger.info("Will reduce the learning rate by a factor of %.2f whenever"
                    " the validation score doesn't improve %d times.",
                    reduce_factor, reduce_num_not_improved)
Beispiel #31
0
def check_arg_compatibility(args: argparse.Namespace):
    """
    Check if some arguments are incompatible with each other.

    :param args: Arguments as returned by argparse.
    """
    check_condition(args.optimized_metric == C.BLEU or args.optimized_metric in args.metrics,
                    "Must optimize either BLEU or one of tracked metrics (--metrics)")

    if args.encoder == C.TRANSFORMER_TYPE:
        check_condition(args.transformer_model_size == args.num_embed[0],
                        "Source embedding size must match transformer model size: %s vs. %s"
                        % (args.transformer_model_size, args.num_embed[0]))

        total_source_factor_size = sum(args.source_factors_num_embed)
        if total_source_factor_size > 0:
            adjusted_transformer_encoder_model_size = args.num_embed[0] + total_source_factor_size
            check_condition(adjusted_transformer_encoder_model_size % 2 == 0 and
                            adjusted_transformer_encoder_model_size % args.transformer_attention_heads == 0,
                            "Sum of source factor sizes, i.e. num-embed plus source-factors-num-embed, (%d) "
                            "has to be even and a multiple of attention heads (%d)" % (
                                adjusted_transformer_encoder_model_size, args.transformer_attention_heads))

    if args.decoder == C.TRANSFORMER_TYPE:
        check_condition(args.transformer_model_size == args.num_embed[1],
                        "Target embedding size must match transformer model size: %s vs. %s"
                        % (args.transformer_model_size, args.num_embed[1]))
    def __init__(self,
                 context: mx.context.Context,
                 inputs: str,
                 references: str,
                 model: str,
                 max_input_len: int,
                 beam_size: int = C.DEFAULT_BEAM_SIZE,
                 bucket_width_source: int = 10,
                 bucket_width_target: int = 10,
                 length_penalty_alpha: float = 1.0,
                 length_penalty_beta: float = 0.0,
                 softmax_temperature: Optional[float] = None,
                 max_output_length_num_stds: int = C.DEFAULT_NUM_STD_MAX_OUTPUT_LENGTH,
                 ensemble_mode: str = 'linear',
                 sample_size: int = -1,
                 random_seed: int = 42) -> None:
        self.context = context
        self.max_input_len = max_input_len
        self.max_output_length_num_stds = max_output_length_num_stds
        self.ensemble_mode = ensemble_mode
        self.beam_size = beam_size
        self.bucket_width_source = bucket_width_source
        self.bucket_width_target = bucket_width_target
        self.length_penalty_alpha = length_penalty_alpha
        self.length_penalty_beta = length_penalty_beta
        self.softmax_temperature = softmax_temperature
        self.model = model
        with smart_open(inputs) as inputs_fin, smart_open(references) as references_fin:
            input_sentences = inputs_fin.readlines()
            target_sentences = references_fin.readlines()
            check_condition(len(input_sentences) == len(target_sentences), "Number of sentence pairs do not match")
            if sample_size <= 0:
                sample_size = len(input_sentences)
            if sample_size < len(input_sentences):
                # custom random number generator to guarantee the same samples across runs in order to be able to
                # compare metrics across independent runs
                random_gen = random.Random(random_seed)
                self.input_sentences, self.target_sentences = zip(
                    *random_gen.sample(list(zip(input_sentences, target_sentences)),
                                       sample_size))
            else:
                self.input_sentences, self.target_sentences = input_sentences, target_sentences

        logger.info("Created CheckpointDecoder(max_input_len=%d, beam_size=%d, model=%s, num_sentences=%d)",
                    max_input_len, beam_size, model, len(self.input_sentences))

        with smart_open(os.path.join(self.model, C.DECODE_REF_NAME), 'w') as trg_out, \
                smart_open(os.path.join(self.model, C.DECODE_IN_NAME), 'w') as src_out:
            [trg_out.write(s) for s in self.target_sentences]
            [src_out.write(s) for s in self.input_sentences]
Beispiel #33
0
 def __init__(self, schedule: List[Tuple[float, int]], updates_per_checkpoint: int) -> None:
     super().__init__()
     check_condition(all(num_updates > 0 for (_, num_updates) in schedule),
                     "num_updates for each step should be > 0.")
     check_condition(all(num_updates % updates_per_checkpoint == 0 for (_, num_updates) in schedule),
                     "num_updates for each step should be divisible by updates_per_checkpoint.")
     self.schedule = schedule
     self.current_step = 0
     self.current_rate = 0.
     self.current_step_num_updates = 0
     self.current_step_started_at = 0
     self.next_step_at = 0
     self.latest_t = 0
     self._update_rate(self.current_step)
Beispiel #34
0
def make_inputs(input_file: Optional[str],
                translator: inference.Translator,
                input_is_json: bool,
                input_factors: Optional[List[str]] = None) -> Generator[inference.TranslatorInput, None, None]:
    """
    Generates TranslatorInput instances from input. If input is None, reads from stdin. If num_input_factors > 1,
    the function will look for factors attached to each token, separated by '|'.
    If source is not None, reads from the source file. If num_source_factors > 1, num_source_factors source factor
    filenames are required.

    :param input_file: The source file (possibly None).
    :param translator: Translator that will translate each line of input.
    :param input_is_json: Whether the input is in json format.
    :param input_factors: Source factor files.
    :return: TranslatorInput objects.
    """
    if input_file is None:
        check_condition(input_factors is None, "Translating from STDIN, not expecting any factor files.")
        for sentence_id, line in enumerate(sys.stdin, 1):
            if input_is_json:
                yield inference.make_input_from_json_string(sentence_id=sentence_id, json_string=line)
            else:
                yield inference.make_input_from_factored_string(sentence_id=sentence_id,
                                                                factored_string=line,
                                                                translator=translator)
    else:
        input_factors = [] if input_factors is None else input_factors
        inputs = [input_file] + input_factors
        check_condition(translator.num_source_factors == len(inputs),
                        "Model(s) require %d factors, but %d given (through --input and --input-factors)." % (
                            translator.num_source_factors, len(inputs)))
        with ExitStack() as exit_stack:
            streams = [exit_stack.enter_context(data_io.smart_open(i)) for i in inputs]
            for sentence_id, inputs in enumerate(zip(*streams), 1):
                if input_is_json:
                    yield inference.make_input_from_json_string(sentence_id=sentence_id, json_string=inputs[0])
                else:
                    yield inference.make_input_from_multiple_strings(sentence_id=sentence_id, strings=list(inputs))
Beispiel #35
0
def get_lr_scheduler(scheduler_type: str,
                     updates_per_checkpoint: int,
                     learning_rate_half_life: int,
                     learning_rate_reduce_factor: float,
                     learning_rate_reduce_num_not_improved: int,
                     learning_rate_schedule: Optional[List[Tuple[float, int]]] = None,
                     learning_rate_warmup: Optional[int] = 0) -> Optional[LearningRateScheduler]:
    """
    Returns a learning rate scheduler.

    :param scheduler_type: Scheduler type.
    :param updates_per_checkpoint: Number of batches between checkpoints.
    :param learning_rate_half_life: Half life of the learning rate in number of checkpoints.
    :param learning_rate_reduce_factor: Factor to reduce learning rate with.
    :param learning_rate_reduce_num_not_improved: Number of checkpoints with no improvement after which learning rate is
           reduced.
    :param learning_rate_schedule: Optional fixed learning rate schedule.
    :param learning_rate_warmup: Number of batches that the learning rate is linearly increased.
    :raises: ValueError if unknown scheduler_type
    :return: Learning rate scheduler.
    """
    check_condition(learning_rate_schedule is None or scheduler_type == C.LR_SCHEDULER_FIXED_STEP,
                    "Learning rate schedule can only be used with '%s' learning rate scheduler."
                    % C.LR_SCHEDULER_FIXED_STEP)
    if scheduler_type is None:
        return None
    if scheduler_type == C.LR_SCHEDULER_FIXED_RATE_INV_SQRT_T:
        return LearningRateSchedulerInvSqrtT(updates_per_checkpoint, learning_rate_half_life, learning_rate_warmup)
    elif scheduler_type == C.LR_SCHEDULER_FIXED_RATE_INV_T:
        return LearningRateSchedulerInvT(updates_per_checkpoint, learning_rate_half_life, learning_rate_warmup)
    elif scheduler_type == C.LR_SCHEDULER_FIXED_STEP:
        check_condition(learning_rate_schedule is not None,
                        "learning_rate_schedule needed for %s scheduler" % C.LR_SCHEDULER_FIXED_STEP)
        return LearningRateSchedulerFixedStep(learning_rate_schedule, updates_per_checkpoint)
    elif scheduler_type == C.LR_SCHEDULER_PLATEAU_REDUCE:
        check_condition(learning_rate_reduce_factor is not None,
                        "learning_rate_reduce_factor needed for %s scheduler" % C.LR_SCHEDULER_PLATEAU_REDUCE)
        check_condition(learning_rate_reduce_num_not_improved is not None,
                        "learning_rate_reduce_num_not_improved needed for %s scheduler" % C.LR_SCHEDULER_PLATEAU_REDUCE)
        if learning_rate_reduce_factor >= 1.0:
            logger.warning("Not using %s learning rate scheduling: learning_rate_reduce_factor == 1.0"
                           % C.LR_SCHEDULER_PLATEAU_REDUCE)
            return None
        return LearningRateSchedulerPlateauReduce(learning_rate_reduce_factor, learning_rate_reduce_num_not_improved,
                                                  learning_rate_warmup)
    else:
        raise ValueError("Unknown learning rate scheduler type %s." % scheduler_type)
Beispiel #36
0
def run_translate(args: argparse.Namespace):

    if args.output is not None:
        global logger
        logger = setup_main_logger(__name__,
                                   console=not args.quiet,
                                   file_logging=True,
                                   path="%s.%s" % (args.output, C.LOG_NAME))

    if args.checkpoints is not None:
        check_condition(len(args.checkpoints) == len(args.models), "must provide checkpoints for each model")

    log_basic_info(args)

    output_handler = get_output_handler(args.output_type,
                                        args.output,
                                        args.sure_align_threshold)

    with ExitStack() as exit_stack:
        check_condition(len(args.device_ids) == 1, "translate only supports single device for now")
        context = determine_context(device_ids=args.device_ids,
                                    use_cpu=args.use_cpu,
                                    disable_device_locking=args.disable_device_locking,
                                    lock_dir=args.lock_dir,
                                    exit_stack=exit_stack)[0]
        logger.info("Translate Device: %s", context)

        if args.override_dtype == C.DTYPE_FP16:
            logger.warning('Experimental feature \'--override-dtype float16\' has been used. '
                           'This feature may be removed or change its behaviour in future. '
                           'DO NOT USE IT IN PRODUCTION!')

        models, source_vocabs, target_vocab = inference.load_models(
            context=context,
            max_input_len=args.max_input_len,
            beam_size=args.beam_size,
            batch_size=args.batch_size,
            model_folders=args.models,
            checkpoints=args.checkpoints,
            softmax_temperature=args.softmax_temperature,
            max_output_length_num_stds=args.max_output_length_num_stds,
            decoder_return_logit_inputs=args.restrict_lexicon is not None,
            cache_output_layer_w_b=args.restrict_lexicon is not None,
            override_dtype=args.override_dtype)
        restrict_lexicon = None  # type: Optional[TopKLexicon]
        if args.restrict_lexicon:
            restrict_lexicon = TopKLexicon(source_vocabs[0], target_vocab)
            restrict_lexicon.load(args.restrict_lexicon, k=args.restrict_lexicon_topk)
        store_beam = args.output_type == C.OUTPUT_HANDLER_BEAM_STORE
        translator = inference.Translator(context=context,
                                          ensemble_mode=args.ensemble_mode,
                                          bucket_source_width=args.bucket_width,
                                          length_penalty=inference.LengthPenalty(args.length_penalty_alpha,
                                                                                 args.length_penalty_beta),
                                          beam_prune=args.beam_prune,
                                          beam_search_stop=args.beam_search_stop,
                                          models=models,
                                          source_vocabs=source_vocabs,
                                          target_vocab=target_vocab,
                                          restrict_lexicon=restrict_lexicon,
                                          avoid_list=args.avoid_list,
                                          store_beam=store_beam,
                                          strip_unknown_words=args.strip_unknown_words)
        read_and_translate(translator=translator,
                           output_handler=output_handler,
                           chunk_size=args.chunk_size,
                           input_file=args.input,
                           input_factors=args.input_factors,
                           input_is_json=args.json_input)
Beispiel #37
0
 def __init__(self, warmup: int = 0) -> None:
     self.base_lr = None  # Note: will be overwritten by MXNet optimizer
     check_condition(warmup >= 0, "warmup needs to be >= 0.")
     self.warmup = warmup
     self.log_warmup_every_t = self.warmup // 10
     self.last_warmup_log = -1