def model(): source = WordEmbedder(vocabulary_file_key="source_words_vocabulary", embedding_size=256) target = WordEmbedder(vocabulary_file_key="target_words_vocabulary", embedding_size=256) return Transformer(source_inputter=source, target_inputter=target, num_layers=3, num_units=256, num_heads=8, ffn_inner_dim=256, dropout=0.1, attention_dropout=0.1, relu_dropout=0.1)
decoder_size_per_head = args.decoder_size_per_head encoder_num_layer = args.encoder_num_layer decoder_num_layer = args.decoder_num_layer encoder_hidden_dim = encoder_head_num * encoder_size_per_head decoder_hidden_dim = decoder_head_num * decoder_size_per_head tf_datatype = tf.float32 np_datatype = np.float32 atol_threshold = 2e-5 if args.data_type == "fp16": tf_datatype = tf.float16 np_datatype = np.float16 atol_threshold = 2e-2 initializer_range = 0.02 source_inputter = WordEmbedder("source_vocabulary", embedding_size=512) target_inputter = WordEmbedder("target_vocabulary", embedding_size=512) inputter = ExampleInputter(source_inputter, target_inputter) inputter.initialize({ "source_vocabulary": "./utils/translation/wmtende.vocab", "target_vocabulary": "./utils/translation/wmtende.vocab" }) vocab_size = target_inputter.vocabulary_size source_file = "./utils/translation/test.en" decoding_args = DecodingArgument(batch_size=batch_size, beam_width=beam_width, head_num=decoder_head_num, size_per_head=decoder_size_per_head,
def __init__( self, source_inputter=None, target_inputter=None, num_layers=6, num_units=512, num_heads=8, ffn_inner_dim=2048, dropout=0.1, attention_dropout=0.1, ffn_dropout=0.1, ffn_activation=tf.nn.relu, position_encoder_class=SinusoidalPositionEncoder, share_embeddings=EmbeddingsSharingLevel.NONE, share_encoders=False, maximum_relative_position=None, attention_reduction=MultiHeadAttentionReduction.FIRST_HEAD_LAST_LAYER, pre_norm=True, drop_encoder_self_attention_residual_connections=set(), alignment_head_num_units=None, ): if source_inputter is None: source_inputter = SILSourceWordEmbedder(embedding_size=num_units) if target_inputter is None: target_inputter = WordEmbedder(embedding_size=num_units) if isinstance(num_layers, (list, tuple)): num_encoder_layers, num_decoder_layers = num_layers else: num_encoder_layers, num_decoder_layers = num_layers, num_layers encoders = [ SILSelfAttentionEncoder( num_encoder_layers, num_units=num_units, num_heads=num_heads, ffn_inner_dim=ffn_inner_dim, dropout=dropout, attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, ffn_activation=ffn_activation, position_encoder_class=position_encoder_class, maximum_relative_position=maximum_relative_position, pre_norm=pre_norm, drop_self_attention_residual_connections= drop_encoder_self_attention_residual_connections, ) for _ in range(source_inputter.num_outputs) ] if len(encoders) > 1: encoder = ParallelEncoder( encoders if not share_encoders else encoders[0], outputs_reducer=None, states_reducer=None, ) else: encoder = encoders[0] decoder = SILSelfAttentionDecoder( num_decoder_layers, num_units=num_units, num_heads=num_heads, ffn_inner_dim=ffn_inner_dim, dropout=dropout, attention_dropout=attention_dropout, ffn_dropout=ffn_dropout, ffn_activation=ffn_activation, position_encoder_class=position_encoder_class, num_sources=source_inputter.num_outputs, maximum_relative_position=maximum_relative_position, attention_reduction=attention_reduction, pre_norm=pre_norm, alignment_head_num_units=alignment_head_num_units, ) self._pre_norm = pre_norm self._num_units = num_units self._num_encoder_layers = num_encoder_layers self._num_decoder_layers = num_decoder_layers self._num_heads = num_heads self._with_relative_position = maximum_relative_position is not None self._is_ct2_compatible = ( isinstance(encoder, SelfAttentionEncoder) and ffn_activation is tf.nn.relu and ((self._with_relative_position and position_encoder_class is None) or (not self._with_relative_position and position_encoder_class == SinusoidalPositionEncoder))) self._dictionary: Optional[Trie] = None if not isinstance(target_inputter, WordEmbedder): raise TypeError("Target inputter must be a WordEmbedder") if EmbeddingsSharingLevel.share_input_embeddings(share_embeddings): if isinstance(source_inputter, ParallelInputter): source_inputters = source_inputter.inputters else: source_inputters = [source_inputter] for inputter in source_inputters: if not isinstance(inputter, WordEmbedder): raise TypeError( "Sharing embeddings requires all inputters to be a " "WordEmbedder") examples_inputter = SILSequenceToSequenceInputter( source_inputter, target_inputter, share_parameters=EmbeddingsSharingLevel.share_input_embeddings( share_embeddings), ) super(SequenceToSequence, self).__init__(examples_inputter) self.encoder = encoder self.decoder = decoder self.share_embeddings = share_embeddings
np_datatype = np.float32 atol_threshold = 2e-5 if args.data_type == "fp16": tf_datatype = tf.float16 np_datatype = np.float16 atol_threshold = 2e-2 initializer_range = 0.02 # generate random data memory_sequence_length = max_seq_len source_embedding = np.random.randn(batch_size, memory_sequence_length, encoder_hidden_dim) source_embedding = tf.convert_to_tensor(source_embedding, dtype=tf_datatype) source_inputter = WordEmbedder("source_vocabulary", embedding_size=encoder_hidden_dim) target_inputter = WordEmbedder("target_vocabulary", embedding_size=encoder_hidden_dim) inputter = ExampleInputter(source_inputter, target_inputter) inputter.initialize({ "source_vocabulary": "./utils/translation/wmtende.vocab", "target_vocabulary": "./utils/translation/wmtende.vocab" }) vocab_size = target_inputter.vocabulary_size source_file = "./utils/translation/test.en" decoding_args = DecodingArgument(batch_size=batch_size, beam_width=beam_width, head_num=decoder_head_num,
def translate_sample(args_dict): print("\n=============== Argument ===============") for key in args_dict: print("{}: {}".format(key, args_dict[key])) print("========================================") np.random.seed(1) tf.set_random_seed(1) random.seed(1) start_of_sentence_id = 1 end_of_sentence_id = 2 kernel_initializer_range = 0.02 bias_initializer_range = 0.02 batch_size = args_dict['batch_size'] beam_width = args_dict['beam_width'] max_seq_len = args_dict['max_seq_len'] encoder_head_num = args_dict['encoder_head_number'] encoder_size_per_head = args_dict['encoder_size_per_head'] decoder_head_num = args_dict['decoder_head_number'] decoder_size_per_head = args_dict['decoder_size_per_head'] encoder_num_layer = args_dict['encoder_num_layer'] decoder_num_layer = args_dict['decoder_num_layer'] encoder_hidden_dim = encoder_head_num * encoder_size_per_head decoder_hidden_dim = decoder_head_num * decoder_size_per_head tf_datatype = tf.float32 if args_dict['data_type'] == "fp16": tf_datatype = tf.float16 beam_search_diversity_rate = args_dict['beam_search_diversity_rate'] sampling_topk = args_dict['sampling_topk'] sampling_topp = args_dict['sampling_topp'] source_inputter = WordEmbedder("source_vocabulary", embedding_size=encoder_hidden_dim, dtype=tf_datatype) target_inputter = WordEmbedder("target_vocabulary", embedding_size=decoder_hidden_dim, dtype=tf_datatype) inputter = ExampleInputter(source_inputter, target_inputter) inputter.initialize({ "source_vocabulary": args_dict['source_vocabulary'], "target_vocabulary": args_dict['target_vocabulary'] }) vocab_size = target_inputter.vocabulary_size source_file = args_dict['source'] is_remove_padding = True if args_dict['remove_padding'].lower( ) == "true" else False encoder_args = TransformerArgument( beam_width=1, head_num=encoder_head_num, size_per_head=encoder_size_per_head, num_layer=encoder_num_layer, dtype=tf_datatype, kernel_init_range=kernel_initializer_range, bias_init_range=bias_initializer_range, remove_padding=is_remove_padding) decoder_args = TransformerArgument( beam_width=beam_width, head_num=decoder_head_num, size_per_head=decoder_size_per_head, num_layer=decoder_num_layer, dtype=tf_datatype, kernel_init_range=kernel_initializer_range, bias_init_range=bias_initializer_range, memory_hidden_dim=encoder_head_num * encoder_size_per_head) decoder_args_2 = copy.deepcopy(decoder_args) # for beam search decoder_args_2.__dict__ = copy.deepcopy(decoder_args.__dict__) decoder_args_2.beam_width = 1 # for sampling decoding_beamsearch_args = DecodingBeamsearchArgument( vocab_size, start_of_sentence_id, end_of_sentence_id, max_seq_len, decoder_args, beam_search_diversity_rate) decoding_sampling_args = DecodingSamplingArgument( vocab_size, start_of_sentence_id, end_of_sentence_id, max_seq_len, decoder_args_2, sampling_topk, sampling_topp) with tf.variable_scope("transformer/encoder", reuse=tf.AUTO_REUSE): dataset = inputter.make_inference_dataset(source_file, batch_size) iterator = dataset.make_initializable_iterator() source = iterator.get_next() source_embedding = source_inputter.make_inputs(source) source_embedding = tf.cast(source_embedding, tf_datatype) memory_sequence_length = source["length"] tf_encoder_result = tf_encoder_opennmt( source_embedding, encoder_args, sequence_length=memory_sequence_length) encoder_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) encoder_variables_dict = {} for v in encoder_vars: encoder_variables_dict[v.name] = v ft_encoder_result = ft_encoder_opennmt( inputs=source_embedding, encoder_args=encoder_args, encoder_vars_dict=encoder_variables_dict, sequence_length=memory_sequence_length) tf_encoder_result = tf.reshape(tf_encoder_result, tf.shape(source_embedding)) ft_encoder_result = tf.reshape(ft_encoder_result, tf.shape(source_embedding)) with tf.variable_scope("transformer/decoder", reuse=tf.AUTO_REUSE): target_inputter.build() target_vocab_rev = target_inputter.vocabulary_lookup_reverse() ### TF BeamSearch Decoding ### tf_beamsearch_target_ids, tf_beamsearch_target_length, _, _, _ = tf_beamsearch_decoding( tf_encoder_result, memory_sequence_length, target_inputter.embedding, decoding_beamsearch_args, decoder_type=0) # tf_beamsearch_target_tokens: [batch_size, beam_width, seq_len] tf_beamsearch_target_tokens = target_vocab_rev.lookup( tf.cast(tf_beamsearch_target_ids, tf.int64)) tf_beamsearch_target_length = tf.minimum( tf_beamsearch_target_length + 1, tf.shape(tf_beamsearch_target_ids)[-1]) ### end of TF BeamSearch Decoding ### ### TF Sampling Decoding ### tf_sampling_target_ids, tf_sampling_target_length = tf_sampling_decoding( tf_encoder_result, memory_sequence_length, target_inputter.embedding, decoding_sampling_args, decoder_type=0) # tf_sampling_target_tokens: [batch_size, seq_len] tf_sampling_target_tokens = target_vocab_rev.lookup( tf.cast(tf_sampling_target_ids, tf.int64)) tf_sampling_target_length = tf.minimum( tf_sampling_target_length + 1, tf.shape(tf_sampling_target_ids)[-1]) ### end of TF BeamSearch Decoding ### ### OP BeamSearch Decoder ### op_decoder_beamsearch_target_ids, op_decoder_beamsearch_target_length, _, _, _ = tf_beamsearch_decoding( tf_encoder_result, memory_sequence_length, target_inputter.embedding, decoding_beamsearch_args, decoder_type=1) # op_decoder_beamsearch_target_tokens: [batch_size, beam_width, seq_len] op_decoder_beamsearch_target_tokens = target_vocab_rev.lookup( tf.cast(op_decoder_beamsearch_target_ids, tf.int64)) op_decoder_beamsearch_target_length = tf.minimum( op_decoder_beamsearch_target_length + 1, tf.shape(op_decoder_beamsearch_target_ids)[-1]) ### end of OP BeamSearch Decoder ### ### OP Sampling Decoder ### op_decoder_sampling_target_ids, op_decoder_sampling_target_length = tf_sampling_decoding( tf_encoder_result, memory_sequence_length, target_inputter.embedding, decoding_sampling_args, decoder_type=1) op_decoder_sampling_target_tokens = target_vocab_rev.lookup( tf.cast(op_decoder_sampling_target_ids, tf.int64)) op_decoder_sampling_target_length = tf.minimum( op_decoder_sampling_target_length + 1, tf.shape(op_decoder_sampling_target_ids)[-1]) ### end of OP BeamSearch Decoder ### ### Prepare Decoding variables for FasterTransformer ### all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) decoder_var_start_id = 0 while all_vars[decoder_var_start_id].name.find( "transformer/decoder") == -1: decoder_var_start_id += 1 decoder_variables = all_vars[ decoder_var_start_id + 1:] # decoder_var_start_id + 1 means skip the embedding table ### OP BeamSearch Decoding ### op_beamsearch_target_ids, op_beamsearch_target_length, _, _, _ = op_beamsearch_decoding( ft_encoder_result, memory_sequence_length, target_inputter.embedding, decoder_variables, decoding_beamsearch_args) op_beamsearch_target_tokens = target_vocab_rev.lookup( tf.cast(op_beamsearch_target_ids, tf.int64)) op_beamsearch_target_length = tf.minimum( op_beamsearch_target_length + 1, tf.shape(op_beamsearch_target_ids)[-1]) ### end of OP BeamSearch Decoding ### ### OP Sampling Decoding ### op_sampling_target_ids, op_sampling_target_length = op_sampling_decoding( ft_encoder_result, memory_sequence_length, target_inputter.embedding, decoder_variables, decoding_sampling_args) op_sampling_target_tokens = target_vocab_rev.lookup( tf.cast(op_sampling_target_ids, tf.int64)) op_sampling_target_length = tf.minimum( op_sampling_target_length + 1, tf.shape(op_sampling_target_ids)[-1]) ### end of OP Sampling Decoding ### config = tf.ConfigProto() config.gpu_options.allow_growth = True time_args = args_dict['test_time'] class TranslationResult(object): def __init__(self, token_op, length_op, name): self.token_op = token_op self.length_op = length_op self.name = name self.file_name = name + ".txt" self.token_list = [] self.length_list = [] self.batch_num = 0 self.execution_time = 0.0 # seconds self.sentence_num = 0 self.bleu_score = None translation_result_list = [] if time_args.find("0") != -1: translation_result_list.append( TranslationResult(tf_beamsearch_target_tokens, tf_beamsearch_target_length, "tf-decoding-beamsearch")) if time_args.find("1") != -1: translation_result_list.append( TranslationResult(op_decoder_beamsearch_target_tokens, op_decoder_beamsearch_target_length, "op-decoder-beamsearch")) if time_args.find("2") != -1: translation_result_list.append( TranslationResult(op_beamsearch_target_tokens, op_beamsearch_target_length, "op-decoding-beamsearch")) if time_args.find("3") != -1: translation_result_list.append( TranslationResult(tf_sampling_target_tokens, tf_sampling_target_length, "tf-decoding-sampling")) if time_args.find("4") != -1: translation_result_list.append( TranslationResult(op_decoder_sampling_target_tokens, op_decoder_sampling_target_length, "op-decoder-sampling")) if time_args.find("5") != -1: translation_result_list.append( TranslationResult(op_sampling_target_tokens, op_sampling_target_length, "op-decoding-sampling")) float_var_list = [] half_var_list = [] for var in tf.global_variables()[:-1]: if var.dtype.base_dtype == tf.float32: float_var_list.append(var) elif var.dtype.base_dtype == tf.float16: half_var_list.append(var) if (len(translation_result_list) == 0): print("[WARNING] No put any test cases.") cuda_profiler = cudaProfiler() cuda_profiler.start() for i in range(len(translation_result_list)): with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) sess.run(iterator.initializer) if (len(float_var_list) > 0): float_saver = tf.train.Saver(float_var_list) float_saver.restore(sess, "translation/ckpt/model.ckpt-500000") if (len(half_var_list) > 0): half_saver = tf.train.Saver(half_var_list) half_saver.restore(sess, "translation/ckpt/fp16_model.ckpt-500000") t1 = datetime.now() while True: try: batch_tokens, batch_length = sess.run([ translation_result_list[i].token_op, translation_result_list[i].length_op ]) for tokens, length in zip(batch_tokens, batch_length): if translation_result_list[i].name.find( "beamsearch") != -1: translation_result_list[i].token_list.append( b" ".join(tokens[0][:length[0] - 2]).decode("UTF-8")) else: translation_result_list[i].token_list.append( b" ".join(tokens[:length - 2]).decode("UTF-8")) translation_result_list[i].batch_num += 1 except tf.errors.OutOfRangeError: break t2 = datetime.now() time_sum = (t2 - t1).total_seconds() translation_result_list[i].execution_time = time_sum with open(translation_result_list[i].file_name, "w") as file_b: for s in translation_result_list[i].token_list: file_b.write(s) file_b.write("\n") ref_file_path = "./.ref_file.txt" os.system("head -n %d %s > %s" % (len(translation_result_list[i].token_list), args_dict['target'], ref_file_path)) translation_result_list[i].bleu_score = bleu_score( translation_result_list[i].file_name, ref_file_path) os.system("rm {}".format(ref_file_path)) time.sleep(60) cuda_profiler.stop() for t in translation_result_list: print( "[INFO] {} translates {} batches taking {:.2f} sec to translate {} tokens, BLEU score: {:.2f}, {:.0f} tokens/sec." .format(t.name, t.batch_num, t.execution_time, t.bleu_score.sys_len, t.bleu_score.score, t.bleu_score.sys_len / t.execution_time)) return translation_result_list
decoder_size_per_head = args.decoder_size_per_head encoder_num_layer = args.encoder_num_layer decoder_num_layer = args.decoder_num_layer encoder_hidden_dim = encoder_head_num * encoder_size_per_head decoder_hidden_dim = decoder_head_num * decoder_size_per_head tf_datatype = tf.float32 np_datatype = np.float32 if args.data_type == "fp16": tf_datatype = tf.float16 np_datatype = np.float16 beam_search_diversity_rate = args.beam_search_diversity_rate sampling_topk = args.sampling_topk sampling_topp = args.sampling_topp source_inputter = WordEmbedder("source_vocabulary", embedding_size=encoder_hidden_dim, dtype=tf_datatype) target_inputter = WordEmbedder("target_vocabulary", embedding_size=decoder_hidden_dim, dtype=tf_datatype) inputter = ExampleInputter(source_inputter, target_inputter) inputter.initialize({ "source_vocabulary": args.source_vocabulary, "target_vocabulary": args.target_vocabulary }) vocab_size = target_inputter.vocabulary_size source_file = args.source encoder_args = TransformerArgument( beam_width=1, head_num=encoder_head_num,
decoder_size_per_head = args.decoder_size_per_head encoder_num_layer = args.encoder_num_layer decoder_num_layer = args.decoder_num_layer encoder_hidden_dim = encoder_head_num * encoder_size_per_head decoder_hidden_dim = decoder_head_num * decoder_size_per_head tf_datatype = tf.float32 np_datatype = np.float32 atol_threshold = 2e-5 if args.data_type == "fp16": tf_datatype = tf.float16 np_datatype = np.float16 atol_threshold = 2e-2 initializer_range = 0.02 source_inputter = WordEmbedder("source_vocabulary", embedding_size=512) target_inputter = WordEmbedder("target_vocabulary", embedding_size=512) inputter = ExampleInputter(source_inputter, target_inputter) inputter.initialize({ "source_vocabulary": "./utils/translation/wmtende.vocab", "target_vocabulary": "./utils/translation/wmtende.vocab" }) vocab_size = target_inputter.vocabulary_size source_file = "./utils/translation/test.en" decoding_args = DecodingArgument(batch_size=batch_size, beam_width=beam_width, head_num=decoder_head_num, size_per_head=decoder_size_per_head, num_layer=decoder_num_layer, max_seq_len=max_seq_len,