def encode(self, inputs, attention_bias, training): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length]. training: boolean, whether in training mode or not. Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) embedded_inputs = tf.cast(embedded_inputs, self.params["dtype"]) inputs_padding = model_utils.get_padding(inputs) attention_bias = tf.cast(attention_bias, self.params["dtype"]) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) pos_encoding = tf.cast(pos_encoding, self.params["dtype"]) encoder_inputs = embedded_inputs + pos_encoding if training: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.params["layer_postprocess_dropout"]) return self.encoder_stack( encoder_inputs, attention_bias, inputs_padding, training=training)
def build_embed(self, inputs, encoder=True, reuse=False): with tf.variable_scope( "Embeddings", reuse=reuse, initializer=tf.contrib.layers.xavier_initializer()): # Word Embedding self.shared_weights = tf.get_variable( 'shared_weights', [self.vocab, self.hidden_dim], dtype=tf.float32) if encoder: max_seq_length = self.max_enc_len else: max_seq_length = self.max_dec_len # Positional Encoding with tf.variable_scope("positional-encoding"): positional_encoded = model_utils.get_position_encoding( max_seq_length, self.hidden_dim) batch_size = tf.shape(inputs)[0] mask = tf.to_float(tf.not_equal(inputs, 0)) ## Add word_emb = tf.nn.embedding_lookup( self.shared_weights, inputs) ## batch_size, length, dim word_emb *= tf.expand_dims(mask, -1) ## zeros out masked positions word_emb *= self.hidden_dim**0.5 ## Scale embedding by the sqrt of the hidden size position_inputs = tf.tile(tf.range(0, max_seq_length), [batch_size]) position_inputs = tf.reshape(position_inputs, [batch_size, max_seq_length]) position_emb = tf.nn.embedding_lookup(positional_encoded, position_inputs) encoded_inputs = tf.add(word_emb, position_emb) return tf.nn.dropout(encoded_inputs, 1.0 - self.dropout)
def Embedding(self, x): # args: x shape: [ batch_size, length] # return: [batch_size, length, hidden_size] hparams = self.hparams if hparams['embedding_model'] == 'transformer': self.embedding_layer = embedding_layer.EmbeddingSharedWeights( hparams["vocab_size"], hparams["hidden_size"]) embedded_inputs = self.embedding_layer(x) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, hparams["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if self.hparams['train']: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.hparams["layer_postprocess_dropout"]) self.inputs_padding = model_utils.get_padding(x) self.attention_bias = model_utils.get_padding_bias(x) return encoder_inputs
def _get_symbols_to_logits_fn(self, max_decode_length, training): """Returns a decoding function that calculates logits of the next tokens.""" timing_signal = model_utils.get_position_encoding(max_decode_length + 1, self.params['hidden_size']) timing_signal = tf.cast(timing_signal, self.params['dtype']) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( max_decode_length, dtype=self.params['dtype']) def symbols_to_logits(ids, i, cache): """Generate logits for next potential IDs. ids: [batch_size * beam_size, i + 1] i: Loop index return: [batch_size * beam_size, vocab_size] """ decoder_input = ids[:, -1:] decoder_input = self.target_embedding_layer(decoder_input) decoder_input += timing_signal[i: i+1] self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] decoder_outputs = self.decoder_stack( decoder_input, cache.get('encoder_outputs'), self_attention_bias, cache.get('encoder_decoder_attention_bias'), training=training, cache=cache) logits = self.target_embedding_layer(decoder_outputs, mode='linear') logits = tf.squeeze(logits, axis=[1]) return logits, cache return symbols_to_logits
def _get_symbols_to_logits_fn(self, max_decode_length): """Returns a decoding function that calculates logits of the next tokens.""" timing_signal = model_utils.get_position_encoding( max_decode_length + 1, self.params["hidden_size"]) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( max_decode_length) # 三角形矩阵 (1,1,length,length) def symbols_to_logits_fn(ids, i, cache): # Set decoder input to the last generated IDs if i == 0: decoder_input = tf.zeros( [ids.shape[0], 1, self.params["hidden_size"]]) else: decoder_input = ids[:, -1:] # (batch, 1) decoder_input = self.embedding_softmax_layer_decoder( decoder_input) # (batch, 1, 256) decoder_input += timing_signal[i:i + 1] # 在翻译中,这里的 bias 是全0向量,长度与当前翻译的长度i相等. 实际上没有任何作用,加入到logits之后,logits不发生变化 self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] decoder_outputs = self.decoder_stack( decoder_input, cache.get("encoder_outputs"), self_attention_bias, cache.get("encoder_decoder_attention_bias"), cache) logits = self.embedding_softmax_layer_decoder.linear( decoder_outputs) return logits, cache return symbols_to_logits_fn
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. 流程: Embedding -> 位置编码 -> dropout -> encoder_stack inputs: 原始的输入句子, shape=[batch_size, input_length]. attention_bias: padding的位置标记为-1e9,其余位置标记为0. shape=[batch_size, 1, 1, input_length] 返回encoder提取的特征: shape=[batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # shape=(batch_size, length, embedding_dim) # embdding的过程中,padding的位置输出的是全0的向量 embedded_inputs = self.embedding_layer_encoder(inputs) # embedding length = tf.shape(embedded_inputs)[1] # 获取 padding 的位置,标记为1, 其余标记为0. shape=(batchsize, length) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): # 位置编码,shape=(length, hidden_size) pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) # 组合, shape=(length, hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.params["layer_postprocess_dropout"]) # 最后一步:调用 encoder_stack处理 return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def train(): #config batch_size = 4 lr = 0.0005 model_dir = 'model2/' logfile = 'second.log' fop = open(logfile, 'w') #prepare data dataline = open('data/train.txt').readlines() datalength = len(dataline) traindata = dataline[int(datalength / 5):] print(len(traindata)) vecmodel = word2vec.sentence2vec('sgns.weibo.bigram-char') a = dataloader(traindata, vecmodel, batch_size) a.start() #build model inputdata = tf.placeholder(tf.float32, [batch_size, None, 300]) inputpadding = tf.placeholder(tf.float32, [batch_size, None]) pos = tf.placeholder(tf.float32, [None, 32]) inputlabel = tf.placeholder(tf.int32, [batch_size]) classifier = model(True, batch_size) outlogit = classifier(inputdata, inputpadding, pos) loss = tf.losses.softmax_cross_entropy(tf.one_hot(inputlabel, 2), outlogit) print(1) train_op = tf.train.AdamOptimizer(lr).minimize(loss) saver = tf.train.Saver(max_to_keep=0) print('build finished') config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) model_file = tf.train.latest_checkpoint(model_dir) saver.restore(sess, model_file) #sess.run(tf.global_variables_initializer()) #sess.run(tf.local_variables_initializer()) #train step for step in tqdm(range(26000, 400000)): data, label, padding = a.getdata() #data=np.zeros((3,20,300)) #padding=np.ones((3,20)) length = data.shape[1] trainloss, _ = sess.run( [loss, train_op], feed_dict={ inputdata: data, inputpadding: padding, pos: model_utils.get_position_encoding(length, 32), inputlabel: label }) if step % 100 == 0: print('loss:' + str(trainloss)) fop.write('loss:' + str(trainloss) + '\n') if step % 1000 == 0: saver.save(sess, model_dir + '/transform.ckpt', global_step=step)
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params.hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params.layer_postprocess_dropout) # with tf.variable_scope( # self.scope, initializer=tf.truncated_normal_initializer(stddev=0.01), reuse=tf.AUTO_REUSE): # self.embeddings = tf.layers.dense( # self.out1, self.params.output_dim, bias_initializer=tf.constant_initializer(0.1), name="dense") return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def decode(self, targets, encoder_outputs, attention_bias): """Generate logits for each value in the target sequence. Args: targets: target values for the output sequence. int tensor with shape [batch_size, target_length] encoder_outputs: continuous representation of input sequence. float tensor with shape [batch_size, input_length, hidden_size] attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float32 tensor with shape [batch_size, target_length, vocab_size] """ with tf.name_scope("decode"): # Prepare inputs to decoder layers by shifting targets, adding positional # encoding and applying dropout. decoder_inputs = self.embedding_softmax_layer(targets) with tf.name_scope("shift_targets"): # Shift targets to the right, and remove the last element decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] with tf.name_scope("add_pos_encoding"): length = tf.shape(decoder_inputs)[1] decoder_inputs += model_utils.get_position_encoding( length, self.params.hidden_size) if self.train: decoder_inputs = tf.nn.dropout( decoder_inputs, 1 - self.params.layer_postprocess_dropout) # Run values decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( length) outputs = self.decoder_stack(decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias) logits = self.embedding_softmax_layer.linear(outputs) return logits
def encode(self, inputs, attention_bias, input_types=None): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if input_types is not None: input_types = model_utils.get_input_types( input_types, self.params["hidden_size"], num_types=self.params["num_types"]) encoder_inputs = encoder_inputs + input_types if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def decode(self, targets, encoder_outputs, attention_bias, training): """Generate logits for each value in the target sequence targets: [batch_size, target_length] encoder_outputs: [batch_size, input_length, hidden_size] attention_bias: [batch_size, 1, 1, input_length] return: [batch_size, target_length, vocab_size] """ with tf.name_scope('decode'): decoder_inputs = self.target_embedding_layer(targets) decoder_inputs = tf.cast(decoder_inputs, self.params['dtype']) attention_bias = tf.cast(attention_bias, self.params['dtype']) with tf.name_scope('shift_targets'): decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] with tf.name_scope('add_pos_encoding'): length = tf.shape(decoder_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params['hidden_size']) pos_encoding = tf.cast(pos_encoding, self.params['dtype']) decoder_inputs += pos_encoding if training: decoder_inputs = tf.nn.dropout( decoder_inputs, rate=self.params['layer_postprocess_dropout']) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( length, dtype=self.params['dtype']) outputs = self.decoder_stack(decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias, training=training) logits = self.target_embedding_layer(outputs, mode='linear') logits = tf.cast(logits, tf.float32) return logits
def _get_symbols_to_logits_fn(self, max_decode_length, training): """Returns a decoding function that calculates logits of the next tokens.""" timing_signal = model_utils.get_position_encoding( max_decode_length + 1, self.params["hidden_size"]) timing_signal = tf.cast(timing_signal, self.params["dtype"]) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( max_decode_length, dtype=self.params["dtype"]) # TODO(b/139770046): Refactor code with better naming of i. def symbols_to_logits_fn(ids, i, cache): """Generate logits for next potential IDs. Args: ids: Current decoded sequences. int tensor with shape [batch_size * beam_size, i + 1]. i: Loop index. cache: dictionary of values storing the encoder output, encoder-decoder attention bias, and previous decoder attention values. Returns: Tuple of (logits with shape [batch_size * beam_size, vocab_size], updated cache values) """ # Set decoder input to the last generated IDs decoder_input = ids[:, -1:] # Preprocess decoder input by getting embeddings and adding timing signal. decoder_input = self.embedding_softmax_layer(decoder_input) if self.params["padded_decode"]: timing_signal_shape = timing_signal.shape.as_list() decoder_input += tf.slice(timing_signal, [i, 0], [1, timing_signal_shape[1]]) bias_shape = decoder_self_attention_bias.shape.as_list() self_attention_bias = tf.slice( decoder_self_attention_bias, [0, 0, i, 0], [bias_shape[0], bias_shape[1], 1, bias_shape[3]]) else: decoder_input += timing_signal[i:i + 1] self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] decoder_outputs = self.decoder_stack( decoder_input, cache.get("encoder_outputs"), self_attention_bias, cache.get("encoder_decoder_attention_bias"), training=training, cache=cache, decode_loop_step=i if self.params["padded_decode"] else None) logits = self.embedding_softmax_layer(decoder_outputs, mode="linear") logits = tf.squeeze(logits, axis=[1]) return logits, cache return symbols_to_logits_fn
def inferenceAPI(self, data): vec = np.array([self.vecmodel(data)]) padnum = np.ones((1, vec.shape[0])) logit = self.sess.run(self.outlabel, feed_dict={ self.inputdata: vec, self.inputpadding: padnum, self.pos: model_utils.get_position_encoding( len(vec), 32) })[0] return logit # just return result
def decode(self, targets, encoder_outputs, attention_bias, training): """Generate logits for each value in the target sequence. Args: targets: target values for the output sequence. int tensor with shape [batch_size, target_length] encoder_outputs: continuous representation of input sequence. float tensor with shape [batch_size, input_length, hidden_size] attention_bias: float tensor with shape [batch_size, 1, 1, input_length] training: boolean, whether in training mode or not. Returns: float32 tensor with shape [batch_size, target_length, vocab_size] """ with tf.name_scope("decode"): # Prepare inputs to decoder layers by shifting targets, adding positional # encoding and applying dropout. decoder_inputs = self.embedding_softmax_layer(targets) decoder_inputs = tf.cast(decoder_inputs, self.params["dtype"]) attention_bias = tf.cast(attention_bias, self.params["dtype"]) with tf.name_scope("shift_targets"): # Shift targets to the right, and remove the last element decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] with tf.name_scope("add_pos_encoding"): length = tf.shape(decoder_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) pos_encoding = tf.cast(pos_encoding, self.params["dtype"]) decoder_inputs += pos_encoding if training: decoder_inputs = tf.nn.dropout( decoder_inputs, rate=self.params["layer_postprocess_dropout"]) # Run values decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( length, dtype=self.params["dtype"]) outputs = self.decoder_stack( decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias, training=training) logits = self.embedding_softmax_layer(outputs, mode="linear") logits = tf.cast(logits, tf.float32) return logits
def decode(self, targets, encoder_outputs, attention_bias): """Generate logits for each value in the target sequence. targets: 目标语言. shape=[batch_size, target_length].用于计算损失 encoder_outputs: encoder的输出,在decoder中要对其进行attention操作.[batch_size, input_length, hidden_size] attention_bias: padding的位置标记为-1e9,其余位置标记为0. shape=[batch_size, 1, 1, input_length] 返回 shape = [batch_size, target_length, vocab_size]. 最后一维与词表长度相等 """ with tf.name_scope("decode"): # embedding后 shape=(batch_size, length, embedding_dim) decoder_inputs = self.embedding_softmax_layer_decoder(targets) # print("decoder_inputs.shape =", decoder_inputs) # 在length中的第一维填充全0向量. 维度不变 with tf.name_scope("shift_targets"): decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] # print("&&", decoder_inputs[0, 0:2, :10]) # 加入位置编码 with tf.name_scope("add_pos_encoding"): length = tf.shape(decoder_inputs)[1] decoder_inputs += model_utils.get_position_encoding( length, self.params["hidden_size"]) if self.train: decoder_inputs = tf.nn.dropout( decoder_inputs, rate=self.params["layer_postprocess_dropout"]) # shape=(1, 1, length, length). 主对角元和下三角为0,其余元素为无穷小. decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( length) # decode. 此处要传入两个bias: # decoder_self_attention_bias 是一个三角矩阵,表示self-attention中的依赖关系 # attention_bias 对encoder源语言中padding的位置标记为-1e9. 输入shape=(batch,length_decoder,dim) outputs = self.decoder_stack(decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias) # 该输出层的权重和embedding层共享. shape=(batch,length_decoder,vocab_size) logits = self.embedding_softmax_layer_decoder.linear(outputs) return logits
def __call__(self, data): vec = np.array([self.vecmodel(data)]) padnum = np.ones((1, vec.shape[0])) logit = self.sess.run(self.outlabel, feed_dict={ self.inputdata: vec, self.inputpadding: padnum, self.pos: model_utils.get_position_encoding( len(vec), 32) })[0] print(logit) if logit == '0': print('Not spam!') elif logit == '1': print('Is spam')
def test(): model_dir = 'model/' logfile = 'test2.log' fop = open(logfile, 'w') #prepare data dataline = open('data/train.txt').readlines() datalength = len(dataline) testdata = dataline[:int(datalength / 5)] vecmodel = word2vec.sentence2vec('sgns.weibo.bigram-char') inputdata = tf.placeholder(tf.float32, [1, None, 300]) inputpadding = tf.placeholder(tf.float32, [1, None]) pos = tf.placeholder(tf.float32, [None, 32]) inputlabel = tf.placeholder(tf.int32, [1]) classifier = model(True, 1) outlabel = tf.argmax(classifier(inputdata, inputpadding, pos), 1) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) saver = tf.train.Saver(max_to_keep=4) model_file = tf.train.latest_checkpoint(model_dir) saver.restore(sess, model_file) sum = 0 accu = 0 for i in range(len(testdata)): vec = np.array([vecmodel(''.join(testdata[i].split()[1:]))]) padnum = np.ones((1, vec.shape[0])) logit = sess.run(outlabel, feed_dict={ inputdata: vec, inputpadding: padnum, pos: model_utils.get_position_encoding(len(vec), 32) })[0] label = testdata[i].split()[0] print(logit, label, int(logit) == int(label)) fop.write(str(logit) + ' ' + str(label) + '\n') sum += 1 if logit == label: accu += 1 print(accu / sum)
def _get_symbols_to_logits_fn(self, max_decode_length): """Returns a decoding function that calculates logits of the next tokens.""" timing_signal = model_utils.get_position_encoding( max_decode_length + 1, self.params["hidden_size"]) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( max_decode_length) def symbols_to_logits_fn(ids, i, cache): """Generate logits for next potential IDs. Args: ids: Current decoded sequences. int tensor with shape [batch_size * beam_size, i + 1] i: Loop index cache: dictionary of values storing the encoder output, encoder-decoder attention bias, and previous decoder attention values. Returns: Tuple of (logits with shape [batch_size * beam_size, vocab_size], updated cache values) """ # Set decoder input to the last generated IDs decoder_input = ids[:, -1:] # Preprocess decoder input by getting embeddings and adding timing signal. decoder_input = self.embedding_softmax_layer(decoder_input) decoder_input += timing_signal[i:i + 1] self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] decoder_outputs = self.decoder_stack( decoder_input, cache.get("encoder_outputs"), self_attention_bias, cache.get("encoder_decoder_attention_bias"), cache) logits = self.embedding_softmax_layer.linear(decoder_outputs) logits = tf.squeeze(logits, axis=[1]) return logits, cache return symbols_to_logits_fn
def encode(self, inputs, attention_bias, training): """Generate continuous representation for inputs. inputs: [batch_size, input_length] attention_bias: [batch_size, 1, 1, input_length] return: [batch_size, input_length, hidden_size] """ with tf.name_scope('encode'): embedded_inputs = self.input_embedding_layer(inputs) embedded_inputs = tf.cast(embedded_inputs, self.params['dtype']) attention_bias = tf.cast(attention_bias, self.params['dtype']) with tf.name_scope('add_pos_encoding'): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params['hidden_size']) pos_encoding = tf.cast(pos_encoding, self.params['dtype']) encoder_inputs = embedded_inputs + pos_encoding if training: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.params['layer_postprocess_dropout']) return self.encoder_stack(encoder_inputs, attention_bias, training=training)
tf_encoder_outputs = tf_transformer.encode(tf_input_x_raw, tf_attention_bias) tf_pred = tf_transformer(tf_input_x_raw)["outputs"] tf_pred_res = tf_sess.run(tf_pred, feed_dict={tf_input_x_raw: my_input_x_raw}) print("tf prediction:") with printoptions(threshold=2000): print(tf_pred_res) k_transformer = KTransformer(params) k_input_x_raw = Input(shape=(_seq_len_x, )) k_input_y_raw = Input(shape=(_seq_len_y, )) k_embedded_inputs = k_transformer.embedding_softmax_layer(k_input_x_raw) k_pos_encoding = k_model_utils.get_position_encoding( seq_len_x, k_transformer.params.hidden_size) k_embedding_inputs = k_embedded_inputs + k_pos_encoding k_attention_bias = k_model_utils.get_padding_bias(k_input_x_raw) k_encoder_outputs = k_transformer.encode(k_input_x_raw, k_attention_bias, train=False) k_output = k_transformer([k_input_x_raw, k_input_y_raw], train=False) tf_sess.run(tf.global_variables_initializer()) tf_sess.run(get_assign_list(k_transformer)) k_run = K.function([k_input_x_raw, k_input_y_raw], [k_output]) k_res = k_run([my_input_x_raw, my_input_y_raw])[0] print("k output:")