コード例 #1
0
def model():
    source = WordEmbedder(vocabulary_file_key="source_words_vocabulary",
                          embedding_size=256)
    target = WordEmbedder(vocabulary_file_key="target_words_vocabulary",
                          embedding_size=256)
    return Transformer(source_inputter=source,
                       target_inputter=target,
                       num_layers=3,
                       num_units=256,
                       num_heads=8,
                       ffn_inner_dim=256,
                       dropout=0.1,
                       attention_dropout=0.1,
                       relu_dropout=0.1)
コード例 #2
0
    decoder_size_per_head = args.decoder_size_per_head
    encoder_num_layer = args.encoder_num_layer
    decoder_num_layer = args.decoder_num_layer
    encoder_hidden_dim = encoder_head_num * encoder_size_per_head
    decoder_hidden_dim = decoder_head_num * decoder_size_per_head
    tf_datatype = tf.float32
    np_datatype = np.float32
    atol_threshold = 2e-5
    if args.data_type == "fp16":
        tf_datatype = tf.float16
        np_datatype = np.float16
        atol_threshold = 2e-2

    initializer_range = 0.02

    source_inputter = WordEmbedder("source_vocabulary", embedding_size=512)
    target_inputter = WordEmbedder("target_vocabulary", embedding_size=512)
    inputter = ExampleInputter(source_inputter, target_inputter)
    inputter.initialize({
        "source_vocabulary":
        "./utils/translation/wmtende.vocab",
        "target_vocabulary":
        "./utils/translation/wmtende.vocab"
    })
    vocab_size = target_inputter.vocabulary_size
    source_file = "./utils/translation/test.en"

    decoding_args = DecodingArgument(batch_size=batch_size,
                                     beam_width=beam_width,
                                     head_num=decoder_head_num,
                                     size_per_head=decoder_size_per_head,
コード例 #3
0
    def __init__(
        self,
        source_inputter=None,
        target_inputter=None,
        num_layers=6,
        num_units=512,
        num_heads=8,
        ffn_inner_dim=2048,
        dropout=0.1,
        attention_dropout=0.1,
        ffn_dropout=0.1,
        ffn_activation=tf.nn.relu,
        position_encoder_class=SinusoidalPositionEncoder,
        share_embeddings=EmbeddingsSharingLevel.NONE,
        share_encoders=False,
        maximum_relative_position=None,
        attention_reduction=MultiHeadAttentionReduction.FIRST_HEAD_LAST_LAYER,
        pre_norm=True,
        drop_encoder_self_attention_residual_connections=set(),
        alignment_head_num_units=None,
    ):
        if source_inputter is None:
            source_inputter = SILSourceWordEmbedder(embedding_size=num_units)
        if target_inputter is None:
            target_inputter = WordEmbedder(embedding_size=num_units)

        if isinstance(num_layers, (list, tuple)):
            num_encoder_layers, num_decoder_layers = num_layers
        else:
            num_encoder_layers, num_decoder_layers = num_layers, num_layers
        encoders = [
            SILSelfAttentionEncoder(
                num_encoder_layers,
                num_units=num_units,
                num_heads=num_heads,
                ffn_inner_dim=ffn_inner_dim,
                dropout=dropout,
                attention_dropout=attention_dropout,
                ffn_dropout=ffn_dropout,
                ffn_activation=ffn_activation,
                position_encoder_class=position_encoder_class,
                maximum_relative_position=maximum_relative_position,
                pre_norm=pre_norm,
                drop_self_attention_residual_connections=
                drop_encoder_self_attention_residual_connections,
            ) for _ in range(source_inputter.num_outputs)
        ]
        if len(encoders) > 1:
            encoder = ParallelEncoder(
                encoders if not share_encoders else encoders[0],
                outputs_reducer=None,
                states_reducer=None,
            )
        else:
            encoder = encoders[0]
        decoder = SILSelfAttentionDecoder(
            num_decoder_layers,
            num_units=num_units,
            num_heads=num_heads,
            ffn_inner_dim=ffn_inner_dim,
            dropout=dropout,
            attention_dropout=attention_dropout,
            ffn_dropout=ffn_dropout,
            ffn_activation=ffn_activation,
            position_encoder_class=position_encoder_class,
            num_sources=source_inputter.num_outputs,
            maximum_relative_position=maximum_relative_position,
            attention_reduction=attention_reduction,
            pre_norm=pre_norm,
            alignment_head_num_units=alignment_head_num_units,
        )

        self._pre_norm = pre_norm
        self._num_units = num_units
        self._num_encoder_layers = num_encoder_layers
        self._num_decoder_layers = num_decoder_layers
        self._num_heads = num_heads
        self._with_relative_position = maximum_relative_position is not None
        self._is_ct2_compatible = (
            isinstance(encoder, SelfAttentionEncoder)
            and ffn_activation is tf.nn.relu and
            ((self._with_relative_position and position_encoder_class is None)
             or (not self._with_relative_position
                 and position_encoder_class == SinusoidalPositionEncoder)))
        self._dictionary: Optional[Trie] = None

        if not isinstance(target_inputter, WordEmbedder):
            raise TypeError("Target inputter must be a WordEmbedder")
        if EmbeddingsSharingLevel.share_input_embeddings(share_embeddings):
            if isinstance(source_inputter, ParallelInputter):
                source_inputters = source_inputter.inputters
            else:
                source_inputters = [source_inputter]
            for inputter in source_inputters:
                if not isinstance(inputter, WordEmbedder):
                    raise TypeError(
                        "Sharing embeddings requires all inputters to be a "
                        "WordEmbedder")

        examples_inputter = SILSequenceToSequenceInputter(
            source_inputter,
            target_inputter,
            share_parameters=EmbeddingsSharingLevel.share_input_embeddings(
                share_embeddings),
        )
        super(SequenceToSequence, self).__init__(examples_inputter)
        self.encoder = encoder
        self.decoder = decoder
        self.share_embeddings = share_embeddings
コード例 #4
0
    np_datatype = np.float32
    atol_threshold = 2e-5
    if args.data_type == "fp16":
        tf_datatype = tf.float16
        np_datatype = np.float16
        atol_threshold = 2e-2

    initializer_range = 0.02
    # generate random data
    memory_sequence_length = max_seq_len
    source_embedding = np.random.randn(batch_size, memory_sequence_length,
                                       encoder_hidden_dim)
    source_embedding = tf.convert_to_tensor(source_embedding,
                                            dtype=tf_datatype)

    source_inputter = WordEmbedder("source_vocabulary",
                                   embedding_size=encoder_hidden_dim)
    target_inputter = WordEmbedder("target_vocabulary",
                                   embedding_size=encoder_hidden_dim)
    inputter = ExampleInputter(source_inputter, target_inputter)
    inputter.initialize({
        "source_vocabulary":
        "./utils/translation/wmtende.vocab",
        "target_vocabulary":
        "./utils/translation/wmtende.vocab"
    })
    vocab_size = target_inputter.vocabulary_size
    source_file = "./utils/translation/test.en"

    decoding_args = DecodingArgument(batch_size=batch_size,
                                     beam_width=beam_width,
                                     head_num=decoder_head_num,
コード例 #5
0
def translate_sample(args_dict):
    print("\n=============== Argument ===============")
    for key in args_dict:
        print("{}: {}".format(key, args_dict[key]))
    print("========================================")

    np.random.seed(1)
    tf.set_random_seed(1)
    random.seed(1)

    start_of_sentence_id = 1
    end_of_sentence_id = 2

    kernel_initializer_range = 0.02
    bias_initializer_range = 0.02

    batch_size = args_dict['batch_size']
    beam_width = args_dict['beam_width']
    max_seq_len = args_dict['max_seq_len']
    encoder_head_num = args_dict['encoder_head_number']
    encoder_size_per_head = args_dict['encoder_size_per_head']
    decoder_head_num = args_dict['decoder_head_number']
    decoder_size_per_head = args_dict['decoder_size_per_head']
    encoder_num_layer = args_dict['encoder_num_layer']
    decoder_num_layer = args_dict['decoder_num_layer']
    encoder_hidden_dim = encoder_head_num * encoder_size_per_head
    decoder_hidden_dim = decoder_head_num * decoder_size_per_head
    tf_datatype = tf.float32
    if args_dict['data_type'] == "fp16":
        tf_datatype = tf.float16
    beam_search_diversity_rate = args_dict['beam_search_diversity_rate']
    sampling_topk = args_dict['sampling_topk']
    sampling_topp = args_dict['sampling_topp']

    source_inputter = WordEmbedder("source_vocabulary",
                                   embedding_size=encoder_hidden_dim,
                                   dtype=tf_datatype)
    target_inputter = WordEmbedder("target_vocabulary",
                                   embedding_size=decoder_hidden_dim,
                                   dtype=tf_datatype)
    inputter = ExampleInputter(source_inputter, target_inputter)
    inputter.initialize({
        "source_vocabulary": args_dict['source_vocabulary'],
        "target_vocabulary": args_dict['target_vocabulary']
    })
    vocab_size = target_inputter.vocabulary_size
    source_file = args_dict['source']
    is_remove_padding = True if args_dict['remove_padding'].lower(
    ) == "true" else False

    encoder_args = TransformerArgument(
        beam_width=1,
        head_num=encoder_head_num,
        size_per_head=encoder_size_per_head,
        num_layer=encoder_num_layer,
        dtype=tf_datatype,
        kernel_init_range=kernel_initializer_range,
        bias_init_range=bias_initializer_range,
        remove_padding=is_remove_padding)

    decoder_args = TransformerArgument(
        beam_width=beam_width,
        head_num=decoder_head_num,
        size_per_head=decoder_size_per_head,
        num_layer=decoder_num_layer,
        dtype=tf_datatype,
        kernel_init_range=kernel_initializer_range,
        bias_init_range=bias_initializer_range,
        memory_hidden_dim=encoder_head_num * encoder_size_per_head)

    decoder_args_2 = copy.deepcopy(decoder_args)  # for beam search
    decoder_args_2.__dict__ = copy.deepcopy(decoder_args.__dict__)
    decoder_args_2.beam_width = 1  # for sampling

    decoding_beamsearch_args = DecodingBeamsearchArgument(
        vocab_size, start_of_sentence_id, end_of_sentence_id, max_seq_len,
        decoder_args, beam_search_diversity_rate)

    decoding_sampling_args = DecodingSamplingArgument(
        vocab_size, start_of_sentence_id, end_of_sentence_id, max_seq_len,
        decoder_args_2, sampling_topk, sampling_topp)

    with tf.variable_scope("transformer/encoder", reuse=tf.AUTO_REUSE):
        dataset = inputter.make_inference_dataset(source_file, batch_size)
        iterator = dataset.make_initializable_iterator()
        source = iterator.get_next()
        source_embedding = source_inputter.make_inputs(source)
        source_embedding = tf.cast(source_embedding, tf_datatype)
        memory_sequence_length = source["length"]

        tf_encoder_result = tf_encoder_opennmt(
            source_embedding,
            encoder_args,
            sequence_length=memory_sequence_length)

        encoder_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        encoder_variables_dict = {}
        for v in encoder_vars:
            encoder_variables_dict[v.name] = v

        ft_encoder_result = ft_encoder_opennmt(
            inputs=source_embedding,
            encoder_args=encoder_args,
            encoder_vars_dict=encoder_variables_dict,
            sequence_length=memory_sequence_length)

    tf_encoder_result = tf.reshape(tf_encoder_result,
                                   tf.shape(source_embedding))
    ft_encoder_result = tf.reshape(ft_encoder_result,
                                   tf.shape(source_embedding))

    with tf.variable_scope("transformer/decoder", reuse=tf.AUTO_REUSE):
        target_inputter.build()
    target_vocab_rev = target_inputter.vocabulary_lookup_reverse()

    ### TF BeamSearch Decoding ###
    tf_beamsearch_target_ids, tf_beamsearch_target_length, _, _, _ = tf_beamsearch_decoding(
        tf_encoder_result,
        memory_sequence_length,
        target_inputter.embedding,
        decoding_beamsearch_args,
        decoder_type=0)

    # tf_beamsearch_target_tokens: [batch_size, beam_width, seq_len]
    tf_beamsearch_target_tokens = target_vocab_rev.lookup(
        tf.cast(tf_beamsearch_target_ids, tf.int64))
    tf_beamsearch_target_length = tf.minimum(
        tf_beamsearch_target_length + 1,
        tf.shape(tf_beamsearch_target_ids)[-1])
    ### end of TF BeamSearch Decoding ###

    ### TF Sampling Decoding ###
    tf_sampling_target_ids, tf_sampling_target_length = tf_sampling_decoding(
        tf_encoder_result,
        memory_sequence_length,
        target_inputter.embedding,
        decoding_sampling_args,
        decoder_type=0)

    # tf_sampling_target_tokens: [batch_size, seq_len]
    tf_sampling_target_tokens = target_vocab_rev.lookup(
        tf.cast(tf_sampling_target_ids, tf.int64))
    tf_sampling_target_length = tf.minimum(
        tf_sampling_target_length + 1,
        tf.shape(tf_sampling_target_ids)[-1])
    ### end of TF BeamSearch Decoding ###

    ### OP BeamSearch Decoder ###
    op_decoder_beamsearch_target_ids, op_decoder_beamsearch_target_length, _, _, _ = tf_beamsearch_decoding(
        tf_encoder_result,
        memory_sequence_length,
        target_inputter.embedding,
        decoding_beamsearch_args,
        decoder_type=1)

    # op_decoder_beamsearch_target_tokens: [batch_size, beam_width, seq_len]
    op_decoder_beamsearch_target_tokens = target_vocab_rev.lookup(
        tf.cast(op_decoder_beamsearch_target_ids, tf.int64))
    op_decoder_beamsearch_target_length = tf.minimum(
        op_decoder_beamsearch_target_length + 1,
        tf.shape(op_decoder_beamsearch_target_ids)[-1])
    ### end of OP BeamSearch Decoder ###

    ### OP Sampling Decoder ###
    op_decoder_sampling_target_ids, op_decoder_sampling_target_length = tf_sampling_decoding(
        tf_encoder_result,
        memory_sequence_length,
        target_inputter.embedding,
        decoding_sampling_args,
        decoder_type=1)

    op_decoder_sampling_target_tokens = target_vocab_rev.lookup(
        tf.cast(op_decoder_sampling_target_ids, tf.int64))
    op_decoder_sampling_target_length = tf.minimum(
        op_decoder_sampling_target_length + 1,
        tf.shape(op_decoder_sampling_target_ids)[-1])
    ### end of OP BeamSearch Decoder ###

    ### Prepare Decoding variables for FasterTransformer  ###
    all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    decoder_var_start_id = 0

    while all_vars[decoder_var_start_id].name.find(
            "transformer/decoder") == -1:
        decoder_var_start_id += 1
    decoder_variables = all_vars[
        decoder_var_start_id +
        1:]  # decoder_var_start_id + 1 means skip the embedding table

    ### OP BeamSearch Decoding ###
    op_beamsearch_target_ids, op_beamsearch_target_length, _, _, _ = op_beamsearch_decoding(
        ft_encoder_result, memory_sequence_length, target_inputter.embedding,
        decoder_variables, decoding_beamsearch_args)

    op_beamsearch_target_tokens = target_vocab_rev.lookup(
        tf.cast(op_beamsearch_target_ids, tf.int64))
    op_beamsearch_target_length = tf.minimum(
        op_beamsearch_target_length + 1,
        tf.shape(op_beamsearch_target_ids)[-1])
    ### end of OP BeamSearch Decoding ###

    ### OP Sampling Decoding ###
    op_sampling_target_ids, op_sampling_target_length = op_sampling_decoding(
        ft_encoder_result, memory_sequence_length, target_inputter.embedding,
        decoder_variables, decoding_sampling_args)

    op_sampling_target_tokens = target_vocab_rev.lookup(
        tf.cast(op_sampling_target_ids, tf.int64))
    op_sampling_target_length = tf.minimum(
        op_sampling_target_length + 1,
        tf.shape(op_sampling_target_ids)[-1])
    ### end of OP Sampling Decoding ###

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    time_args = args_dict['test_time']

    class TranslationResult(object):
        def __init__(self, token_op, length_op, name):
            self.token_op = token_op
            self.length_op = length_op
            self.name = name
            self.file_name = name + ".txt"

            self.token_list = []
            self.length_list = []
            self.batch_num = 0
            self.execution_time = 0.0  # seconds
            self.sentence_num = 0
            self.bleu_score = None

    translation_result_list = []

    if time_args.find("0") != -1:
        translation_result_list.append(
            TranslationResult(tf_beamsearch_target_tokens,
                              tf_beamsearch_target_length,
                              "tf-decoding-beamsearch"))
    if time_args.find("1") != -1:
        translation_result_list.append(
            TranslationResult(op_decoder_beamsearch_target_tokens,
                              op_decoder_beamsearch_target_length,
                              "op-decoder-beamsearch"))
    if time_args.find("2") != -1:
        translation_result_list.append(
            TranslationResult(op_beamsearch_target_tokens,
                              op_beamsearch_target_length,
                              "op-decoding-beamsearch"))
    if time_args.find("3") != -1:
        translation_result_list.append(
            TranslationResult(tf_sampling_target_tokens,
                              tf_sampling_target_length,
                              "tf-decoding-sampling"))
    if time_args.find("4") != -1:
        translation_result_list.append(
            TranslationResult(op_decoder_sampling_target_tokens,
                              op_decoder_sampling_target_length,
                              "op-decoder-sampling"))
    if time_args.find("5") != -1:
        translation_result_list.append(
            TranslationResult(op_sampling_target_tokens,
                              op_sampling_target_length,
                              "op-decoding-sampling"))

    float_var_list = []
    half_var_list = []
    for var in tf.global_variables()[:-1]:
        if var.dtype.base_dtype == tf.float32:
            float_var_list.append(var)
        elif var.dtype.base_dtype == tf.float16:
            half_var_list.append(var)

    if (len(translation_result_list) == 0):
        print("[WARNING] No put any test cases.")

    cuda_profiler = cudaProfiler()
    cuda_profiler.start()
    for i in range(len(translation_result_list)):
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.tables_initializer())
            sess.run(iterator.initializer)

            if (len(float_var_list) > 0):
                float_saver = tf.train.Saver(float_var_list)
                float_saver.restore(sess, "translation/ckpt/model.ckpt-500000")
            if (len(half_var_list) > 0):
                half_saver = tf.train.Saver(half_var_list)
                half_saver.restore(sess,
                                   "translation/ckpt/fp16_model.ckpt-500000")

            t1 = datetime.now()
            while True:
                try:
                    batch_tokens, batch_length = sess.run([
                        translation_result_list[i].token_op,
                        translation_result_list[i].length_op
                    ])
                    for tokens, length in zip(batch_tokens, batch_length):
                        if translation_result_list[i].name.find(
                                "beamsearch") != -1:
                            translation_result_list[i].token_list.append(
                                b" ".join(tokens[0][:length[0] -
                                                    2]).decode("UTF-8"))
                        else:
                            translation_result_list[i].token_list.append(
                                b" ".join(tokens[:length - 2]).decode("UTF-8"))
                    translation_result_list[i].batch_num += 1
                except tf.errors.OutOfRangeError:
                    break
            t2 = datetime.now()
            time_sum = (t2 - t1).total_seconds()
            translation_result_list[i].execution_time = time_sum

            with open(translation_result_list[i].file_name, "w") as file_b:
                for s in translation_result_list[i].token_list:
                    file_b.write(s)
                    file_b.write("\n")

            ref_file_path = "./.ref_file.txt"
            os.system("head -n %d %s > %s" %
                      (len(translation_result_list[i].token_list),
                       args_dict['target'], ref_file_path))
            translation_result_list[i].bleu_score = bleu_score(
                translation_result_list[i].file_name, ref_file_path)
            os.system("rm {}".format(ref_file_path))

            time.sleep(60)
    cuda_profiler.stop()

    for t in translation_result_list:
        print(
            "[INFO] {} translates {} batches taking {:.2f} sec to translate {} tokens, BLEU score: {:.2f}, {:.0f} tokens/sec."
            .format(t.name, t.batch_num, t.execution_time,
                    t.bleu_score.sys_len, t.bleu_score.score,
                    t.bleu_score.sys_len / t.execution_time))

    return translation_result_list
コード例 #6
0
    decoder_size_per_head = args.decoder_size_per_head
    encoder_num_layer = args.encoder_num_layer
    decoder_num_layer = args.decoder_num_layer
    encoder_hidden_dim = encoder_head_num * encoder_size_per_head
    decoder_hidden_dim = decoder_head_num * decoder_size_per_head
    tf_datatype = tf.float32
    np_datatype = np.float32
    if args.data_type == "fp16":
        tf_datatype = tf.float16
        np_datatype = np.float16
    beam_search_diversity_rate = args.beam_search_diversity_rate
    sampling_topk = args.sampling_topk
    sampling_topp = args.sampling_topp

    source_inputter = WordEmbedder("source_vocabulary",
                                   embedding_size=encoder_hidden_dim,
                                   dtype=tf_datatype)
    target_inputter = WordEmbedder("target_vocabulary",
                                   embedding_size=decoder_hidden_dim,
                                   dtype=tf_datatype)
    inputter = ExampleInputter(source_inputter, target_inputter)
    inputter.initialize({
        "source_vocabulary": args.source_vocabulary,
        "target_vocabulary": args.target_vocabulary
    })
    vocab_size = target_inputter.vocabulary_size
    source_file = args.source

    encoder_args = TransformerArgument(
        beam_width=1,
        head_num=encoder_head_num,
コード例 #7
0
    decoder_size_per_head = args.decoder_size_per_head
    encoder_num_layer = args.encoder_num_layer
    decoder_num_layer = args.decoder_num_layer
    encoder_hidden_dim = encoder_head_num * encoder_size_per_head
    decoder_hidden_dim = decoder_head_num * decoder_size_per_head
    tf_datatype = tf.float32
    np_datatype = np.float32
    atol_threshold = 2e-5
    if args.data_type == "fp16":
        tf_datatype = tf.float16
        np_datatype = np.float16
        atol_threshold = 2e-2

    initializer_range = 0.02

    source_inputter = WordEmbedder("source_vocabulary", embedding_size=512)
    target_inputter = WordEmbedder("target_vocabulary", embedding_size=512)
    inputter = ExampleInputter(source_inputter, target_inputter)
    inputter.initialize({
        "source_vocabulary": "./utils/translation/wmtende.vocab",
        "target_vocabulary": "./utils/translation/wmtende.vocab"
    })
    vocab_size = target_inputter.vocabulary_size
    source_file = "./utils/translation/test.en"

    decoding_args = DecodingArgument(batch_size=batch_size,
                                     beam_width=beam_width,
                                     head_num=decoder_head_num,
                                     size_per_head=decoder_size_per_head,
                                     num_layer=decoder_num_layer,
                                     max_seq_len=max_seq_len,