def __init__(self, units, n_classes, dropout_rate=0.0, hidden_activation='relu', output_activation='softmax', name='Starcane', **kwargs): super(STARCANE, self).__init__(name=name, **kwargs) self.units = units self.f1 = Cnn(filters=self.units, kernel_size=3, dropout_rate=0.4) self.f2 = tf.keras.layers.RNN(RecurrentCell(self.units, dropout=dropout_rate), return_sequences=True, name="rnn_cell") self.attention = AttentionLayer(self.units) self.attention_f = AttentionLayerF(self.units, name="attention_embedding") self.hidden3 = tf.keras.layers.Dense(units=units, activation=hidden_activation, name="hidden_3") self.batchNorm3 = tf.keras.layers.BatchNormalization( name="batchnorm_hidden_3") self.hidden4 = tf.keras.layers.Dense(units=units, activation=hidden_activation, name="hidden_4") self.batchNorm4 = tf.keras.layers.BatchNormalization( name="batchnorm_hidden_4") self.clf_output = tf.keras.layers.Dense(units=n_classes, activation=output_activation, name="output_model") self.clf_aux = tf.keras.layers.Dense(units=n_classes, activation=output_activation, name="output_model_aux")
def __init__(self, adj_lists, feat_data, num_classes, embed_dim, num_sample, num_layers, is_cuda=True): super(HANSage, self).__init__() self.is_cuda = is_cuda self.num_sample = num_sample self.embed_dim = embed_dim self.encoders = [] for i, adj_list in enumerate(adj_lists): # 放在不同的GPU上面 device = 'cuda:{}'.format(i + 1) print(device) self.encoders.append( GraphSage(adj_list, feat_data, num_classes, self.embed_dim, num_sample, num_layers, self.is_cuda, as_view=True, cuda_device=device).to(device)) for i, meta_encoder in enumerate(self.encoders): self.add_module('metaencoder_{}'.format(i), meta_encoder) self.atten = AttentionLayer(self.embed_dim, self.embed_dim) self.clf = Classifer(self.embed_dim, num_classes)
def define_nmt(hidden_size, batch_size, en_timesteps, en_vsize, fr_timesteps, fr_vsize): """ Defining a NMT model """ # Define an input sequence and process it. if batch_size: encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(batch_shape=(batch_size, fr_timesteps - 1, fr_vsize), name='decoder_inputs') else: encoder_inputs = Input(shape=(en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(shape=(fr_timesteps - 1, fr_vsize), name='decoder_inputs') # Encoder GRU encoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(encoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='decoder_gru'), name='bidirectional_decoder') decoder_out, decoder_fwd_state, decoder_back_state = decoder_gru(decoder_inputs, initial_state=[encoder_fwd_state, encoder_back_state]) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(fr_vsize, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy') full_model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(encoder_inf_inputs) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, fr_vsize), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps, 2*hidden_size), name='encoder_inf_states') decoder_init_fwd_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_fwd_init') decoder_init_back_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_back_init') decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder_gru(decoder_inf_inputs, initial_state=[decoder_init_fwd_state, decoder_init_back_state]) attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = Model(inputs=[encoder_inf_states, decoder_init_fwd_state, decoder_init_back_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_fwd_state, decoder_inf_back_state]) return full_model, encoder_model, decoder_model
def test_attention_layer_standalone_none_b_none_t(): inp1 = Input(shape=(None, 15)) inp2 = Input(shape=(None, 25)) out, e_out = AttentionLayer()([inp1, inp2]) assert check_tensorshape_equal(out.shape, tf.TensorShape([None, None, inp1.shape[2]])) assert check_tensorshape_equal(e_out.shape, tf.TensorShape([None, None, None]))
def build_model(self): """ Function to build the seq2seq model used. :return: Encoder model, decoder model (used for predicting) and full model (used for training). """ # Define model inputs for the encoder/decoder stack x_enc = Input(shape=(self.seq_len_in, self.input_feature_amount), name="x_enc") x_dec = Input(shape=(self.seq_len_out, self.output_feature_amount), name="x_dec") # Add noise x_dec_t = GaussianNoise(0.2)(x_dec) # Define the encoder GRU, which only has to return a state encoder_gru = GRU(self.state_size, return_sequences=True, return_state=True, name="encoder_gru") encoder_out, encoder_state = encoder_gru(x_enc) # Decoder GRU decoder_gru = GRU(self.state_size, return_state=True, return_sequences=True, name="decoder_gru") # Use these definitions to calculate the outputs of out encoder/decoder stack dec_intermediates, decoder_state = decoder_gru(x_dec_t, initial_state=encoder_state) # Define the attention layer attn_layer = AttentionLayer(name="attention_layer") attn_out, attn_states = attn_layer([encoder_out, dec_intermediates]) # Concatenate decoder and attn out decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([dec_intermediates, attn_out]) # Define the dense layer dense = Dense(self.output_feature_amount, activation='linear', name='output_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Define the encoder/decoder stack model encdecmodel = tsModel(inputs=[x_enc, x_dec], outputs=decoder_pred) # Define the separate encoder model for inferencing encoder_inf_inputs = Input(shape=(self.seq_len_in, self.input_feature_amount), name="encoder_inf_inputs") encoder_inf_out, encoder_inf_state = encoder_gru(encoder_inf_inputs) encoder_model = tsModel(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_state]) # Define the separate encoder model for inferencing decoder_inf_inputs = Input(shape=(1, self.output_feature_amount), name="decoder_inputs") encoder_inf_states = Input(shape=(self.seq_len_in, self.state_size), name="encoder_inf_states") decoder_init_state = Input(shape=(self.state_size,), name="decoder_init") decoder_inf_out, decoder_inf_state = decoder_gru(decoder_inf_inputs, initial_state=decoder_init_state) attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = tsModel(inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) return encoder_model, decoder_model, encdecmodel
def test_attention_layer_standalone_fixed_b_fixed_t(): """ Tests fixed batch size and time steps Encoder and decoder has variable seq length and latent dim """ inp1 = Input(batch_shape=(5, 10, 15)) inp2 = Input(batch_shape=(5, 15, 25)) out, e_out = AttentionLayer()([inp1, inp2]) assert out.shape == tf.TensorShape( [inp2.shape[0], inp2.shape[1], inp1.shape[2]]) assert e_out.shape == tf.TensorShape( [inp1.shape[0], inp2.shape[1], inp1.shape[1]])
def __init__(self, args, asp_mat): super(ABAE, self).__init__() self.args = args self.device = args.mdevice self.asp_mat = nn.Parameter( asp_mat, requires_grad=True) # [n_aspect, embed_dim] self.text_linear = nn.Linear(args.embed_dim, args.embed_dim, bias=False) self.attention = AttentionLayer(m_dim=args.embed_dim, score_func="sdp") self.asp_infer = nn.Sequential( nn.Linear(args.embed_dim, args.n_aspect), nn.Softmax(dim=-1)) self.desc = ">>> Model Summary [{}] \n" \ "\t-aspect: {}\n".format(self.__class__.__name__, args.n_aspect)
def __init__(self, num_classes, num_nodes, feat_data, feat_dim, adj_lists, adj_matrix, cuda, num_sample_tpl, num_sample_permission, embed_dim, num_layers): super(HANSage, self).__init__() self.is_cuda = cuda self.num_sample_tpl = num_sample_tpl self.num_sample_permission = num_sample_permission self.embed_dim = embed_dim adj_tpl = adj_lists['tpl'] adj_permission = adj_lists['permission'] mat_tpl = adj_matrix['tpl'] mat_permission = adj_matrix['permission'] self.encoder_tpl = GraphSage(num_classes, num_nodes, feat_data, feat_dim, adj_tpl, mat_tpl, self.is_cuda, self.num_sample_tpl, self.embed_dim, num_layers, as_view=True) self.encoder_permission = GraphSage(num_classes, num_nodes, feat_data, feat_dim, adj_permission, mat_permission, self.is_cuda, self.num_sample_permission, self.embed_dim, num_layers, as_view=True) self.atten = AttentionLayer(self.embed_dim, self.embed_dim) self.clf = Classifer(self.embed_dim, num_classes)
def define_nmt2(maxlen): input_1 = Input(shape=(maxlen,), name="input1") x = Embedding(25000, 128, input_length=maxlen, name='embedding_1', trainable=False)(input_1) encoder_out, forward_h, backward_h = Bidirectional(GRU(32, return_sequences=True, return_state=True))(x) decoder_out, forward_h, backward_h = Bidirectional(GRU(32, return_sequences=True, return_state=True))(x, initial_state=[ forward_h, backward_h]) print('encoder_out > ', encoder_out.shape) print('decoder_out > ', decoder_out.shape) attn_out, attn_states = AttentionLayer()([encoder_out, decoder_out]) a = Concatenate([decoder_out, attn_out], axis=1) dense = Dense(25000, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(a) # Full model full_model = Model(inputs=x, outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy') full_model.summary()
def model(conf, args): # 高度和长度都不定,是None,虽然可以定义高度(32,None,3),但是一般都是从左到右定义None的,所以第一个写32也就没有意义了 # fix the width & width,give up the mask idea.... input_image = Input(shape=(conf.INPUT_IMAGE_HEIGHT, conf.INPUT_IMAGE_WIDTH, 3), name='input_image') #高度固定为32,3通道 # input_image = Masking(0.0)(input_image) <----- 哭:卷基层不支持Mask,欲哭无泪:TypeError: Layer block1_conv1 does not support masking, but was passed an input_mask: Tensor("masking/Any_1:0", shape=(?, 32, ?), dtype=bool) # 1. 卷基层,输出是conv output is (Batch,Width/32,512) conv_output = conv(input_image) # 经过padding后,转变为=>(Batch,50,512) # conv_output_with_padding = Lambda(padding_wrapper,arguments={'mask_value':conf.MASK_VALUE})(conv_output) # conv_output_mask = Masking(conf.MASK_VALUE)(conv_output) # 2.Encoder Bi-GRU编码器 encoder_bi_gru = Bidirectional(GRU(conf.GRU_HIDDEN_SIZE, return_sequences=True, return_state=True, name='encoder_gru'), input_shape=(conf.INPUT_IMAGE_WIDTH / 4, 512), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_bi_gru( conv_output) # 3.Decoder GRU解码器,使用encoder的输出当做输入状态 decoder_inputs = Input(shape=(None, conf.CHARSET_SIZE), name='decoder_inputs') # masked_decoder_inputs = Masking(conf.MASK_VALUE)(decoder_inputs) # GRU的units=GRU_HIDDEN_SIZE*2=512,是解码器GRU输出的维度,至于3770是之后,在做一个全连接才可以得到的 # units指的是多少个隐含神经元,这个数量要和前面接的Bi-LSTM一致(他是512),这样,才可以接受前面的Bi-LSTM的输出作为他的初始状态输入 decoder_gru = GRU(units=conf.GRU_HIDDEN_SIZE * 2, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru( decoder_inputs, initial_state=Concatenate(axis=-1)( [encoder_fwd_state, encoder_back_state])) # 4.Attention layer注意力层 attn_layer = AttentionLayer(name='attention_layer') # attention层的输入是编码器的输出,和,解码器的输出,他俩的输出是一致的,都是512 # encoder_out shape=(?, 50, 512) 50是图像宽度/4 , # decoder_out shape=(?, 30, 512) 30是要识别的字符串长度 logger.debug("模型Attention调用的张量[encoder_out, decoder_out]:%r,%r", encoder_out, decoder_out) attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # concat Attention的输出 + GRU的输出 decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) # 5.Dense layer output layer 输出层 dense = Dense(conf.CHARSET_SIZE, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_prob = dense_time(decoder_concat_input) # whole model 整个模型 train_model = Model(inputs=[input_image, decoder_inputs], outputs=decoder_prob) opt = Adam(lr=args.learning_rate) # categorical_crossentropy主要是对多分类的一个损失,但是seq2seq不仅仅是一个结果,而是seq_length个多分类问题,是否还可以用categorical_crossentropy? # 这个疑惑在这个例子中看到答案:https://keras.io/examples/lstm_seq2seq/ # 我猜,keras的代码中应该是做了判断,如果是多个categorical_crossentropy,应该会tf.reduce_mean()一下吧。。。 train_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=[words_accuracy]) train_model.summary() # ################################################################################################## # 预测用的模型,单独定义! # 预测的模型和训练模型不一样,分成2个模型,一个是编码器 encoder model,一个解码器 decoder model # ################################################################################################## ### encoder model ### infer_input_image = Input(shape=(conf.INPUT_IMAGE_HEIGHT, conf.INPUT_IMAGE_WIDTH, 3), name='input_image') #高度固定为32,3通道 infer_conv_output = conv(infer_input_image) # 看!复用了 decoder_gru infer_encoder_out, infer_encoder_fwd_state, infer_encoder_back_state = \ encoder_bi_gru(infer_conv_output) infer_encoder_model = Model(inputs=infer_input_image, outputs=[ infer_encoder_out, infer_encoder_fwd_state, infer_encoder_back_state ]) infer_encoder_model.summary() ### decoder model ### infer_decoder_inputs = Input(shape=(conf.INPUT_IMAGE_WIDTH / 4, conf.CHARSET_SIZE), name='decoder_inputs') infer_encoder_out_states = Input(shape=(1, 2 * conf.GRU_HIDDEN_SIZE), name='encoder_out_states') infer_decoder_init_state = Input(batch_shape=(1, 2 * conf.GRU_HIDDEN_SIZE), name='decoder_init_state') infer_decoder_out, infer_decoder_state = \ decoder_gru(infer_decoder_inputs, initial_state=infer_decoder_init_state) # 看!复用了 decoder_gru infer_attn_out, infer_attn_states = \ attn_layer([infer_encoder_out_states, infer_decoder_out]) # 看!复用了attn_layer infer_decoder_concat = Concatenate( axis=-1, name='concat')([infer_decoder_out, infer_attn_out]) infer_decoder_pred = TimeDistributed(dense)( infer_decoder_concat) # 看!复用了dense infer_decoder_model = Model( inputs=[ infer_decoder_inputs, infer_encoder_out_states, infer_decoder_init_state ], outputs=[infer_decoder_pred, infer_attn_states, infer_decoder_state]) infer_decoder_model.summary() return train_model, infer_decoder_model, infer_encoder_model
def train_model(conf, args): conv, input_image = Conv().build() encoder_bi_gru = Bidirectional(GRU(conf.GRU_HIDDEN_SIZE, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') # TODO:想不通如何实现2个bi-GRU堆叠,作罢,先继续,未来再回过头来考虑 # encoder_bi_gru2 = Bidirectional(GRU(conf.GRU_HIDDEN_SIZE, # return_sequences=True, # return_state=True, # name='encoder_gru'), # input_shape=( int(conf.INPUT_IMAGE_WIDTH/4) ,512), # name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_bi_gru(conv) encoder_fwd_state = _p(encoder_fwd_state, "编码器输出Fwd状态%%%%%%%%%%%%%%%%%%%%%%%%%%%") encoder_back_state = _p(encoder_back_state, "编码器输出Back状态%%%%%%%%%%%%%%%%%%%%%%%%%%%") decoder_inputs = Input(shape=(None, conf.CHARSET_SIZE), name='decoder_inputs') decoder_gru = GRU(units=conf.GRU_HIDDEN_SIZE * 2, return_sequences=True, return_state=True, name='decoder_gru') decoder_initial_status = Concatenate(axis=-1)( [encoder_fwd_state, encoder_back_state]) decoder_out, decoder_state = decoder_gru( decoder_inputs, initial_state=decoder_initial_status) attn_layer = AttentionLayer(name='attention_layer') logger.debug("模型Attention调用的张量[encoder_out, decoder_out]:%r,%r", encoder_out, decoder_out) attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # c_outputs, e_outputs decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) dense = Dense(conf.CHARSET_SIZE, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') # decoder_concat_input = _p(decoder_concat_input, "编码器输出所有的状态s%%%%%%%%%%%%%%%%%%%%%%%%%%%") decoder_prob = dense_time(decoder_concat_input) train_model = Model(inputs=[input_image, decoder_inputs], outputs=decoder_prob) opt = Adam(lr=args.learning_rate) # categorical_crossentropy主要是对多分类的一个损失,但是seq2seq不仅仅是一个结果,而是seq_length个多分类问题,是否还可以用categorical_crossentropy? # 这个疑惑在这个例子中看到答案:https://keras.io/examples/lstm_seq2seq/ # 我猜,keras的代码中应该是做了判断,如果是多个categorical_crossentropy,应该会K.reduce_mean()一下吧。。。 train_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=[words_accuracy]) train_model.summary() return train_model
def attention_model(src_vocab, target_vocab, src_timesteps, target_timesteps, units): encoder_inputs = Input(shape=(src_timesteps, ), name='encoder_inputs') decoder_inputs = Input(shape=(target_timesteps - 1, target_vocab), name='decoder_inputs') embedding = Embedding(src_vocab, units, input_length=src_timesteps, name='enc_embedding', mask_zero=True) encoder_lstm = Bidirectional(LSTM(units, return_sequences=True, return_state=True, name='encoder_lstm'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, _, encoder_back_state, _ = encoder_lstm( embedding(encoder_inputs)) enc_states = [encoder_fwd_state, encoder_back_state] # Decoder decoder_lstm = LSTM(units, return_sequences=True, return_state=True, name='decoder_lstm') decoder_out, decoder_state, decoder_back_state = decoder_lstm( decoder_inputs, initial_state=enc_states) # Attention attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # concat attention and decoder output decoder_output_concat = Concatenate(axis=-1)([decoder_out, attn_out]) # FC layer dense = Dense(target_vocab, activation='softmax') time_distributed = TimeDistributed(dense) decoder_pred = time_distributed(decoder_output_concat) model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Inference models # Encoder Inference model encoder_inf_inputs = Input(batch_shape=( 1, src_timesteps, ), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_state, _, encoder_inf_back_state, _ = encoder_lstm( embedding(encoder_inf_inputs)) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[ encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state ]) # # Decoder Inference model decoder_inf_inputs = Input(batch_shape=(1, 1, target_vocab), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(1, src_timesteps, 2 * units), name='encoder_inf_states') decoder_init_fwd_state = Input(batch_shape=(1, units), name='decoder_fwd_init') decoder_init_back_state = Input(batch_shape=(1, units), name='decoder_back_init') decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder_lstm( decoder_inf_inputs, initial_state=[decoder_init_fwd_state, decoder_init_back_state]) attn_inf_out, attn_inf_states = attn_layer( [encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = Model(inputs=[ encoder_inf_states, decoder_init_fwd_state, decoder_init_back_state, decoder_inf_inputs ], outputs=[ decoder_inf_pred, attn_inf_states, decoder_inf_fwd_state, decoder_inf_back_state ]) return model, encoder_model, decoder_model
def attention_model_new_arch(src_vocab, target_vocab, src_timesteps, target_timesteps, units, epochs=30): encoder_inputs = Input(shape=(src_timesteps, ), name='encoder_inputs') decoder_inputs = Input(shape=(target_timesteps - 1, target_vocab), name='decoder_inputs') embedding = Embedding(src_vocab, units, input_length=src_timesteps, name='enc_embedding', mask_zero=True) embedding2 = Dropout(0.5)(embedding(encoder_inputs)) encoder_lstm = Bidirectional(LSTM(units, return_sequences=True, return_state=True, kernel_constraint=max_norm(3.0), recurrent_constraint=max_norm(3.0), name='encoder_lstm'), name='bidirectional_encoder') encoder_out, forward_h, forward_c, backward_h, backward_c = encoder_lstm( embedding2) state_h = Concatenate()([forward_h, backward_h]) state_c = Concatenate()([forward_c, backward_c]) enc_states = [state_h, state_c] # Decoder decoder_lstm = LSTM(units * 2, return_sequences=True, return_state=True, name='decoder_lstm') decoder_out, _, _ = decoder_lstm(decoder_inputs, initial_state=enc_states) # Attention attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # concat attention and decoder output decoder_output_concat = Concatenate(axis=-1)([decoder_out, attn_out]) # FC layer tst = Dropout(0.5)(decoder_output_concat) decoder_dense = Dense(target_vocab, activation='softmax') decoder_pred = decoder_dense(tst) model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Inference models # Encoder Inference model encoder_inf_inputs = Input(batch_shape=( 1, src_timesteps, ), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_h, encoder_inf_fwd_c, encoder_inf_back_h, encoder_inf_back_c = encoder_lstm( embedding(encoder_inf_inputs)) encoder_inf_h = Concatenate()([encoder_inf_fwd_h, encoder_inf_back_h]) encoder_inf_c = Concatenate()([encoder_inf_fwd_c, encoder_inf_back_c]) encoder_model = Model( inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_h, encoder_inf_c]) # Decoder Inference model encoder_inf_states = Input(batch_shape=(1, src_timesteps, 2 * units), name='encoder_inf_states') decoder_inf_inputs = Input(batch_shape=(1, 1, target_vocab), name='decoder_word_inputs') decoder_init_fwd_state = Input(batch_shape=(1, units * 2), name='decoder_fwd_init') decoder_init_back_state = Input(batch_shape=(1, units * 2), name='decoder_back_init') decoder_states_inputs = [decoder_init_fwd_state, decoder_init_back_state] decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder_lstm( decoder_inf_inputs, initial_state=decoder_states_inputs) attn_inf_out, attn_inf_states = attn_layer( [encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = decoder_dense(decoder_inf_concat) decoder_model = Model(inputs=[ encoder_inf_states, decoder_init_fwd_state, decoder_init_back_state, decoder_inf_inputs ], outputs=[ decoder_inf_pred, attn_inf_states, decoder_inf_fwd_state, decoder_inf_back_state ]) return model, encoder_model, decoder_model
def define_nmt(batch_size, en_timesteps, en_vsize, fr_timesteps, fr_vsize): """ Defining a NMT model """ HIDDEN_SIZE_DEC = HIDDEN_DIM * 2 #if IS_BIDIRECTIONAL else HIDDEN_DIM # Define an input sequence and process it. if batch_size: encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(batch_shape=(batch_size, fr_timesteps - 1, fr_vsize), name='decoder_inputs') else: encoder_inputs = Input(shape=(en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(shape=(fr_timesteps - 1, fr_vsize), name='decoder_inputs') # Encoder GRU encoder_gru = create_rnn_layer(HIDDEN_DIM, name='encoder_rnn') encoder_out, encoder_states = get_state(encoder_gru(encoder_inputs)) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = create_rnn_layer(HIDDEN_SIZE_DEC, bi_layer=False, name='decoder_rnn') # encoder_states = Concatenate(axis=-1)(encoder_states) print('encoder_states!!!!: ', encoder_states) d = decoder_gru(decoder_inputs, initial_state=encoder_states) decoder_out, decoder_state = get_state(d, bi_layer=False) # encoder_gru = Bidirectional(RECURRENT(HIDDEN_DIM, return_sequences=True, return_state=True, name='encoder_gru')) # encoder_out, fwd, back = encoder_gru(encoder_inputs) # decoder_gru = RECURRENT(HIDDEN_SIZE_DEC, return_sequences=True, return_state=True, name='decoder_gru') # encoder_states = Concatenate(axis=-1)([fwd, back]) # print('encoder_states!!!!: ', encoder_states) # decoder_out, decoder_state = decoder_gru(decoder_inputs, initial_state=encoder_states) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(fr_vsize, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer=RMSprop(lr=LR), loss='categorical_crossentropy') full_model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inf_inputs') #v1 encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru( encoder_inf_inputs) print('V1 INFER ENCODER: ', encoder_inf_out.shape, encoder_inf_fwd_state.shape, encoder_inf_back_state.shape) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[ encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state ]) #v2 # encoder_inf_out, encoder_inf_states = get_state(encoder_gru(encoder_inf_inputs)) # print('V2 INFER ENCODER: ', encoder_inf_out.shape, encoder_inf_states.shape) # encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, ] + encoder_inf_states) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, fr_vsize), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps, HIDDEN_SIZE_DEC), name='encoder_inf_states') decoder_init_state = Input(batch_shape=(batch_size, HIDDEN_SIZE_DEC), name='decoder_init') decoder_inf_out, decoder_inf_state = decoder_gru( decoder_inf_inputs, initial_state=decoder_init_state) attn_inf_out, attn_inf_states = attn_layer( [encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = Model( inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) return full_model, encoder_model, decoder_model
def vgg_gru(input_image,vgg_conv5): """ Defining a NMT model """ # VGG的Conv5,然后按照宽度展开,把H中的数据concat到一起,是model,model的父类也是layer # input_shape = (img_width,img_height,channel) # [batch,width,height,channel] => [batch,width,height*channel] # [samples, time steps, features] # vgg_conv5_shape = tf.shape(vgg_conv5) # vgg_conv5_shape = vgg_conv5.shape.as_list() vgg_conv5_shape = [x if x is not None else -1 for x in vgg_conv5.shape.as_list()] # 支持else的写法 # vgg_conv5_shape = [x for x in vgg_conv5.shape.as_list() if x is not None] # print(vgg_conv5_shape) b = vgg_conv5_shape[0] w = vgg_conv5_shape[1] h = vgg_conv5_shape[2] c = vgg_conv5_shape[3] print("(b,w,c*h)",(b,w,c*h)) # rnn_input = tf.reshape(vgg_conv5,(b,w,c*h)) # 转置[batch,width,height,channel] => [batch,width,height*channel] # print(tf.shape(rnn_input)) # VGG的Conv5,然后按照宽度展开,把H中的数据concat到一起,是model,model的父类也是layer rnn_input = Reshape((w,c*h))(vgg_conv5) print("rnn_input.shape=", rnn_input) # time_distribute = TimeDistributed(Lambda(lambda x: model_cnn(x)))( # input_lay) # keras.layers.Lambda is essential to make our trick work :) # 1.Encoder GRU编码器 encoder_gru = Bidirectional(GRU(64,#写死一个隐含神经元数量 return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(rnn_input) # 2.Decoder GRU,using `encoder_states` as initial state. # 使用encoder的输出当做decoder的输入 decoder_inputs = Input(shape=(5,64), name='decoder_inputs') decoder_gru = GRU(64*2, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru( decoder_inputs, initial_state=Concatenate(axis=-1)([encoder_fwd_state, encoder_back_state]) ) # Attention layer attn_layer = AttentionLayer(name='attention_layer') print("encoder_out:",encoder_out) print("decoder_out:", decoder_out) attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # concat Attention的输出 + GRU的输出 decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer, dense = Dense(64, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[input_image, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy') full_model.summary() return full_model