def build_model3(): input_layer = Input(shape=(5000, 4)) conv_layer1 = Conv1D(filters=64, kernel_size=20, padding='same', activation='relu', strides=1, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5), name='cnn1')(input_layer) bn1 = BatchNormalization(name='bn1')(conv_layer1) conv_layer2 = Conv1D(filters=128, kernel_size=20, padding='same', activation='relu', strides=1, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5), name='cnn2')(bn1) bn2 = BatchNormalization(name='bn2')(conv_layer2) reshape = Reshape( (int(5000 * 2 / filter_length), int(filter_length / 2), 128))(bn2) attention_pooling = TimeDistributed(AttentionLayer(), name='attentionPooling')(reshape) bn3 = BatchNormalization(name='bn3')(attention_pooling) bilstm_layer = Bidirectional(LSTM(units=lstm_units, return_sequences=True, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5)), name='bilstm')(bn3) bn4 = BatchNormalization(name='bn4')(bilstm_layer) flatten = Flatten()(bn4) dense_layer = Dense(units=danse_units, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5), activation='relu', name='dense')(flatten) bn5 = BatchNormalization(name='bn5')(dense_layer) #dp2=Dropout(0.5)(bn6) output_layer = Dense(units=1, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5), activation='sigmoid', name='classify')(bn5) model = Model(input=input_layer, output=output_layer) return model
def model(max_features,maxlen,attention=True): """ build a model with bi-gru ,you also can choose add attention layer or not you should define the max_features and maxlen :param max_features: :param maxlen: :param attention: :return: """ embedding_layer = Embedding(input_dim=max_features, output_dim=128, input_length=maxlen, trainable=True) sequence_input = Input(shape=(maxlen,), dtype='int32') embedded_sequences = embedding_layer(sequence_input) gru = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences) if attention: att = AttentionLayer()(gru) preds = Dense(1, activation='sigmoid')(att) else: flat = GlobalAvgPool1D()(gru) preds = Dense(1, activation='sigmoid')(flat) model = Model(sequence_input, preds) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['acc']) model.summary() return model
def build_model2(): input_layer = Input(shape=(5000, 4)) conv_layer1 = Conv1D(filters=64, kernel_size=20, padding='same', activation='relu', strides=1, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5), name='cnn1')(input_layer) bn1 = BatchNormalization(name='bn1')(conv_layer1) conv_layer2 = Conv1D(filters=128, kernel_size=20, padding='same', activation='relu', strides=1, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5), name='cnn2')(bn1) bn2 = BatchNormalization(name='bn2')(conv_layer2) max_pool_layer = MaxPooling1D(pool_size=int(filter_length / 2), strides=int(filter_length / 2), padding='same')(bn2) bn3 = BatchNormalization(name='bn3')(max_pool_layer) bilstm_layer = Bidirectional(LSTM(units=lstm_units, return_sequences=True, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5)), name='lstm')(bn3) bn4 = BatchNormalization(name='bn4')(bilstm_layer) attention_layer = AttentionLayer(name='attention')(bn4) bn5 = BatchNormalization(name='bn5')(attention_layer) dense_layer = Dense(units=danse_units, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5), activation='relu', name='dense')(bn5) bn6 = BatchNormalization(name='bn6')(dense_layer) output_layer = Dense(units=1, kernel_regularizer=regularizers.l2(1e-5), bias_regularizer=regularizers.l2(1e-5), activation='sigmoid', name='classify')(bn6) model = Model(input=input_layer, output=output_layer) return model
def __init__(self, args, rnn_type, vocab_size, embedding_dim, hidden_size, num_layers, dropout=0.5, bidirectional=False, pretrained_embedding=None): super(RNNModel, self).__init__() self.encoder = nn.Embedding(vocab_size, embedding_dim) self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_size, num_layers, bias=False, dropout=dropout, bidirectional=bidirectional) self.decoder = nn.Linear(hidden_size, 1) self.decoder_bi = nn.Linear(hidden_size * 2, 1) self.AttentionLayer = AttentionLayer(args, hidden_size) self.auglinear = nn.Linear(hidden_size * 2, args.document_hidden_size) self.decoder_ = nn.Linear(args.document_hidden_size, 1) self.init_weights() self.args = args self.bidirectional = bidirectional self.rnn_type = rnn_type self.hidden_size = hidden_size self.num_layers = num_layers
def __init__(self, config, tok2i, sampler, encoder): super(LSTMDecoder, self).__init__() self.fc_dim = config['fc_dim'] # fc? self.dec_lstm_dim = config['dec_lstm_dim'] self.dec_n_layers = config['dec_n_layers'] # decoder layers number self.n_classes = config['n_classes'] # number of token classes self.word_emb_dim = config[ 'word_emb_dim'] # dimension of word embedding self.device = config['device'] self.longest_label = config['longest_label'] self.model_type = config['model_type'] self.aux_end = config.get('aux_end', False) self.encoder = encoder # encoder is ONLSTMEncoder # -- Decoder self.dec_lstm_input_dim = config.get('dec_lstm_input_dim', self.word_emb_dim) self.dec_lstm = nn.LSTM(self.dec_lstm_input_dim, self.dec_lstm_dim, self.dec_n_layers, batch_first=True) # use torch implemented LSTM self.dec_emb = nn.Embedding(self.n_classes, self.word_emb_dim) if config['nograd_emb']: self.dec_emb.weight.requires_grad = False self.dropout = nn.Dropout(p=config['dropout']) # Layers for mapping LSTM output to scores self.o2emb = nn.Linear(self.dec_lstm_dim, self.word_emb_dim) # Optionally use the (|V| x d_emb) matrix from the embedding layer here. if config['share_inout_emb']: # in bagorder, default is True self.out_bias = nn.Parameter( torch.zeros(self.n_classes).uniform_(0.01)) self.emb2score = lambda x: F.linear( x, self.dec_emb.weight, self.out_bias ) # emb2score(x) = x*dec_emb.weight + out_bias else: self.emb2score = nn.Linear(self.word_emb_dim, self.n_classes) self.register_buffer('START', torch.LongTensor([tok2i['<s>']])) self.sampler = sampler self.end = tok2i['<end>'] if self.aux_end: self.o2stop = nn.Sequential( nn.Linear(self.dec_lstm_dim, self.word_emb_dim), nn.ReLU(), self.dropout, nn.Linear(self.word_emb_dim, 1), nn.Sigmoid()) if self.model_type == 'translation': self.enc_to_h0 = nn.Linear( config['enc_lstm_dim'] * config['num_dir_enc'], self.dec_n_layers * self.dec_lstm_dim) self.attention = AttentionLayer( input_dim=self.dec_lstm_dim, hidden_size=self.dec_lstm_dim, bidirectional=config['num_dir_enc'] == 2) self.decode = self.forward_decode_attention self.decode = self.forward_decode # temporarily use this self.dec_emb.weight = self.encoder.emb.weight else: self.decode = self.forward_decode # in bagorder, decode is forward_decode
def __init__(self, last_stride, block, layers, baseWidth=26, scale=4, num_classes=1000, with_attention=False): self.inplanes = 64 super(Res2Net, self).__init__() self.baseWidth = baseWidth self.scale = scale self.conv1 = nn.Sequential(nn.Conv2d(3, 32, 3, 2, 1, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.Conv2d(32, 32, 3, 1, 1, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.Conv2d(32, 64, 3, 1, 1, bias=False)) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU() self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=last_stride) self.with_attention = with_attention if self.with_attention: # spatial attention part self.attention1 = AttentionLayer(256, 16) self.attention2 = AttentionLayer(512, 32) self.avgpool = nn.AdaptiveAvgPool2d(1) self.last_linear = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def load_model(self, filepath: str): """Loads a model with a custom AttentionLayer property.""" model = load_model( filepath, custom_objects={"AttentionLayer": AttentionLayer(Layer)}) if model: self.model = model return model else: return None
def decoder(ENCODER_OUTPUTS, STATE_H, STATE_C,STATE_H2, STATE_C2,ENCODER_INPUTS): '''creating the decoder layer''' # Set up the decoder, using `encoder_states` as initial state. DECODER_INPUTS = Input(shape=(None,)) # embedding layer DEC_EMB_LAYER = Embedding(Y_VOC, EMBEDDING_DIM, trainable=True) DEC_EMB = DEC_EMB_LAYER(DECODER_INPUTS) DECODER_LSTM = LSTM(LATENT_DIM, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.2) DECODER_OUTPUTS, DECODER_FWD_STATE, DECODER_BACK_STATE = DECODER_LSTM( DEC_EMB, initial_state=[STATE_H, STATE_C]) # Attention layer ATTN_LAYER = AttentionLayer(name='attention_layer') ATTN_OUT, ATTN_STATES = ATTN_LAYER([ENCODER_OUTPUTS, DECODER_OUTPUTS]) # Concat attention input and decoder LSTM output DECODER_CONCAT_INPUT = Concatenate(axis=-1, name='concat_layer')( [DECODER_OUTPUTS, ATTN_OUT]) # dense layer DECODER_DENSE = TimeDistributed(Dense(Y_VOC, activation='softmax')) DECODER_OUTPUTS = DECODER_DENSE(DECODER_CONCAT_INPUT) # Decoder setup # Below tensors will hold the states of the previous time step DECODER_STATE_INPUT_H = Input(shape=(LATENT_DIM,)) DECODER_STATE_INPUT_C = Input(shape=(LATENT_DIM,)) DECODER_HIDDEN_STATE_INPUT = Input(shape=(MAX_TEXT_LEN, LATENT_DIM)) # Get the embeddings of the decoder sequence DEC_EMB2 = DEC_EMB_LAYER(DECODER_INPUTS) # To predict the next word in the sequence, set the initial states to the states from the previous time step DECODER_OUTPUTS2, NEW_STRING2, STATE_C2 = DECODER_LSTM(DEC_EMB2, initial_state=[DECODER_STATE_INPUT_H, DECODER_STATE_INPUT_C]) # attention inference ATTN_OUT_INF, ATTN_STATES_INF = ATTN_LAYER([DECODER_HIDDEN_STATE_INPUT, DECODER_OUTPUTS2]) DECODER_INF_CONCAT = Concatenate(axis=-1, name='concat')([DECODER_OUTPUTS2, ATTN_OUT_INF]) # A dense softmax layer to generate prob dist. over the target vocabulary DECODER_OUTPUTS2 = DECODER_DENSE(DECODER_INF_CONCAT) # Final decoder model DECODER_MODEL = Model( [DECODER_INPUTS] + [DECODER_HIDDEN_STATE_INPUT, DECODER_STATE_INPUT_H, DECODER_STATE_INPUT_C], [DECODER_OUTPUTS2] + [STATE_H2, STATE_C2]) return DECODER_INPUTS, DECODER_OUTPUTS,DEC_EMB_LAYER,DECODER_LSTM,DECODER_MODEL
def create_model_base(self, embeddings_matrix): """Creates the base hierarchical attention network with multiclass or binary tuning. If doing non-binary classification, set self.num_classes to number of classes. """ embedding_layer = Embedding( len(self.word_index) + 1, ATTENTION_DIM, weights=[embeddings_matrix], input_length=MAX_SENTENCE_LENGTH, trainable=True, mask_zero=True, ) sentence_input = Input(shape=(MAX_SENTENCE_LENGTH, ), dtype="int32") embedded_sequences = embedding_layer(sentence_input) l_lstm = Bidirectional(GRU(ATTENTION_DIM, return_sequences=True))(embedded_sequences) l_att = AttentionLayer(ATTENTION_DIM)(l_lstm) sentEncoder = Model(sentence_input, l_att) input_layer = Input(shape=(MAX_SENTENCE_COUNT, MAX_SENTENCE_LENGTH), dtype="int32") layer_encoder = TimeDistributed(sentEncoder)(input_layer) l_lstm_sent = Bidirectional(GRU(ATTENTION_DIM, return_sequences=True))(layer_encoder) l_att_sent = AttentionLayer(ATTENTION_DIM)(l_lstm_sent) if self.num_classes > 2: # multi-class classifier preds = Dense(self.num_classes, activation="softmax")(l_att_sent) model = Model(input_layer) model = Model(input_layer, preds) model.compile(loss="categorical_crossentropy", optimizer="adadelta", metrics=metrics) else: # binary classifier preds = Dense(2, activation="softmax")(l_att_sent) model = Model(input_layer, preds) model.compile(loss="binary_crossentropy", optimizer="adadelta", metrics=metrics) return model
def __init__(self, vocab_size, embedding_size, hidden_size, rnncell='GRU', num_layers=1, max_unroll=40, dropout=0.0, word_drop=0.0, batch_first=True, sample=False, temperature=1.0, use_attention=True, attn_size=128, sos_id=2, eos_id=3, use_input_feed=True, use_kb=False, is_mutlitask=False, kb_size=None, celeb_vec_size=None, state_size=None): super(DecoderRNN, self).__init__() self.vocab_size = vocab_size self.embedding_size = embedding_size self.hidden_size = hidden_size self.sos_id = sos_id self.eos_id = eos_id self.num_layers = num_layers self.dropout = dropout self.temperature = temperature self.word_drop = word_drop self.max_unroll = max_unroll self.sample = sample self.is_mutlitask = is_mutlitask self.use_kb = use_kb self.state_size = state_size self.kb_size = kb_size # self.beam_size = beam_size self.attn_size = attn_size self.celeb_vec_size = celeb_vec_size self.use_input_feed = use_input_feed self.embedding = nn.Embedding(vocab_size, self.embedding_size) self.rnncell = nn.GRU(self._input_size(), self.hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.use_attention = use_attention self.attention = AttentionLayer(self.attn_size) self.out = nn.Linear(hidden_size, vocab_size) self.softmax = nn.Softmax() self.sigmoid = nn.Sigmoid()
def __init__(self, args, vocab_size, pretrained=None): super(BasicRNN, self).__init__() self.args = args self.vocab_size = vocab_size self.encoder = nn.Embedding(self.vocab_size, self.args.embedding_size) self.rnn = getattr(nn, self.args.rnn_type)(self.args.embedding_size, self.args.rnn_size, self.args.rnn_layers, bias=False) self.decoder = nn.Linear(self.args.rnn_size, 3) self.softmax = nn.Softmax() self.AttentionLayer = AttentionLayer(self.args, self.args.rnn_size) self.init_weights(pretrained=pretrained) print("Initialized {} model".format(self.args.rnn_type))
def get_model(hidden_size, batch_size, en_timesteps, en_vsize, fr_timesteps, fr_vsize): # Define an input sequence and process it. if batch_size: encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name="encoder_inputs") decoder_inputs = Input(batch_shape=(batch_size, fr_timesteps, fr_vsize), name="decoder_inputs") else: encoder_inputs = Input(shape=(en_timesteps, en_vsize), name="encoder_inputs") decoder_inputs = Input(shape=(fr_timesteps, fr_vsize), name="decoder_inputs") # Encoder GRU encoder_gru = layers.Bidirectional(prunable_layers.PrunableGRU(hidden_size, return_sequences=True, return_state=True, name="encoder_gru"), name="bidirectional_encoder") encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(encoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = prunable_layers.PrunableGRU(hidden_size * 2, return_sequences=True, return_state=True, name="decoder_gru") decoder_out, decoder_state = decoder_gru(decoder_inputs, initial_state=layers.Concatenate(axis=-1)([encoder_fwd_state, encoder_back_state])) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = layers.Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = prunable_layers.PrunableDense(fr_vsize, activation='softmax', name='softmax_layer') dense_time = layers.TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = models.Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(encoder_inf_inputs) encoder_model = models.Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, fr_vsize), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps, 2*hidden_size), name='encoder_inf_states') decoder_init_state = Input(batch_shape=(batch_size, 2*hidden_size), name='decoder_init') decoder_inf_out, decoder_inf_state = decoder_gru(decoder_inf_inputs, initial_state=decoder_init_state) attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = layers.Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = layers.TimeDistributed(dense)(decoder_inf_concat) decoder_model = models.Model(inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) return full_model, encoder_model, decoder_model
def __init__(self, feat_dim, vocab_size, embed_size, n_attention_stacks, hidden_dim_img): super(SAN, self).__init__() self.feat_dim = feat_dim self.vocab_size = vocab_size self.embed_size = embed_size self.img_enc = ImageEncoder(feat_dim) self.ques_enc = QuestionEncoder(vocab_size, embed_size, feat_dim) self.att = nn.ModuleList([ AttentionLayer(hidden_dim_img, feat_dim, feat_dim) for _ in range(n_attention_stacks) ]) self.pred = nn.Linear(feat_dim, vocab_size)
def test(model, output, hidden_size, content_len): encoder_inputs = output[0] encoder_outputs = output[1] state_h = output[2] state_c = output[3] decoder_inputs = output[4] dec_emb_layer = output[5] decoder_lstm = output[6] decoder_softmax_layer = output[7] # 인코더 설계 encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_outputs, state_h, state_c]) # 이전 시점의 상태들을 저장하는 텐서 decoder_state_input_h = Input(shape=(hidden_size, )) decoder_state_input_c = Input(shape=(hidden_size, )) dec_emb2 = dec_emb_layer(decoder_inputs) # 문장의 다음 단어를 예측하기 위해서 초기 상태(initial_state)를 이전 시점의 상태로 사용. 이는 뒤의 함수 decode_sequence()에 구현 # 훈련 과정에서와 달리 LSTM의 리턴하는 은닉 상태와 셀 상태인 state_h와 state_c를 버리지 않음. decoder_outputs2, state_h2, state_c2 = decoder_lstm( dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c]) # 어텐션 함수 decoder_hidden_state_input = Input(shape=(content_len, hidden_size)) attn_layer = AttentionLayer(name='attention_layer') attn_out_inf, attn_states_inf = attn_layer( [decoder_hidden_state_input, decoder_outputs2]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_outputs2, attn_out_inf]) # 디코더의 출력층 decoder_outputs2 = decoder_softmax_layer(decoder_inf_concat) # 최종 디코더 모델 decoder_model = Model([decoder_inputs] + [ decoder_hidden_state_input, decoder_state_input_h, decoder_state_input_c ], [decoder_outputs2] + [state_h2, state_c2]) return encoder_model, decoder_model
def defineModel(latent_dim, max_length_source, src_vocab, trg_vocab): # Encoder encoder_inputs = Input(shape=(max_length_source, )) enc_emb = Embedding(src_vocab, latent_dim, trainable=True)(encoder_inputs) #LSTM 1 encoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True) encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb) #LSTM 2 encoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True) encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) #LSTM 3 encoder_lstm3 = LSTM(latent_dim, return_state=True, return_sequences=True) encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2) # Set up the decoder. decoder_inputs = Input(shape=(None, )) dec_emb_layer = Embedding(trg_vocab, latent_dim, trainable=True) dec_emb = dec_emb_layer(decoder_inputs) #LSTM using encoder_states as initial state decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm( dec_emb, initial_state=[state_h, state_c]) #Attention Layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # Concat attention output and decoder LSTM output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) #Dense layer decoder_dense = TimeDistributed(Dense(trg_vocab, activation='softmax')) decoder_outputs = decoder_dense(decoder_concat_input) # Define the model model = Model([encoder_inputs, decoder_inputs], decoder_outputs) return model
def __init__(self, vocab_size, paragraph_window_size, summary_window_size): super(LSTM_Seq2Seq, self).__init__() self.paragraph_window_size = paragraph_window_size self.summary_window_size = summary_window_size self.vocab_size = vocab_size self.batch_size = 100 self.embedding_size = 15 self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.009) # self.paragraph_embedding = tf.Variable(tf.random.truncated_normal(shape=[self.paragraph_window_size,self.embedding_size],stddev=0.01,dtype=tf.float32)) self.paragraph_embedding1 = Embedding( self.vocab_size, self.embedding_size, input_length=self.paragraph_window_size) self.summary_embedding1 = Embedding( self.vocab_size, self.embedding_size, input_length=self.summary_window_size) self.encoder = LSTM(80, activation='relu', return_state=True, return_sequences=True) self.encoder1 = LSTM(80, activation='relu', return_state=True, return_sequences=True) self.encoder2 = LSTM(80, activation='relu', return_state=True, return_sequences=True) # self.inputs2 = Input(shape=(summary_window_size,)) # self.summary_embedding = tf.Variable(tf.random.truncated_normal(shape=[self.summary_window_size,self.embedding_size],stddev=0.01,dtype=tf.float32)) self.decoder = LSTM(80, activation='relu', return_state=True, return_sequences=True) self.attn_layer = AttentionLayer(name='attention_layer') self.outputs = Dense(vocab_size, activation='softmax')
def decoder(encoded_input, f, n_hidden, config): Layer1 = Simple(n_hidden, name='rand_inp') first_input_ = Layer1(f) lstm = LSTM(8, kernel_initializer=glorot_uniform(), return_sequences=True, stateful=True, name='lstm') positions = [] log_softmax = [] Layer2 = Decode(config.batch_size, name='decode_layer') attn_lyr = AttentionLayer(name='attention_layer') i = first_input_ for step in range(config.nCells * config.nMuts): output = lstm(i) attn_out, attn_stat = attn_lyr([encoded_input, output]) i, position, log_soft1 = Layer2([encoded_input, attn_out]) positions.append(position) log_softmax.append(log_soft1) poss = Concatenate(axis=-1, trainable=True, name='poss')(positions) log_s = Concatenate(axis=-1, trainable=True, name='log_s')(log_softmax) return poss, log_s
def getModel(self, xTrain, yTrain, xVal, yVal, xVocabSize, yVocabSize, maxTextLen): my_file = Path("./textSumamrizationModel.h5") if my_file.is_file(): self.model = keras.models.load_model('./textSumamrizationModel.h5', custom_objects={'AttentionLayer': AttentionLayer}) else: self.encoderInput = Input(shape=(maxTextLen,)) embL = Embedding(xVocabSize, 200,trainable=True)(self.encoderInput) encoderLSTM = LSTM(300, return_state=True, return_sequences=True,dropout=0.4,recurrent_dropout=0.4) self.encoderOutput, self.stateH, self.stateC= encoderLSTM(embL) # Decoder self.decoderInput = Input(shape=(None,)) self.decL = Embedding(yVocabSize, 200,trainable=True) decEmb = self.decL(self.decoderInput) self.decoderLstm = LSTM(300, return_sequences=True, return_state=True,dropout=0.4,recurrent_dropout=0.2) decoderOutputs,decoderFwdState, decoderBackState = self.decoderLstm(decEmb,initial_state=[self.stateH, self.stateC]) #Attention layer self.attnL = AttentionLayer(name='attention_layer') attnO, attnS = self.attnL([self.encoderOutput, decoderOutputs]) decoderCInput = Concatenate(axis=-1, name='concat_layer')([decoderOutputs, attnO]) #dense layer self.decoderDense = TimeDistributed(Dense(yVocabSize, activation='softmax')) decoderOutputs = self.decoderDense(decoderCInput) # Define the model self.model = Model([self.encoderInput, self.decoderInput], decoderOutputs) self.model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') self.model.summary() es = EarlyStopping(monitor='val_loss', mode='min', verbose = 1, patience = 2) self.history = self.model.fit([xTrain,yTrain[:,:-1]], yTrain.reshape(yTrain.shape[0],yTrain.shape[1], 1)[:,1:] ,epochs= 5,callbacks=[es],batch_size=256, validation_data=([xVal,yVal[:,:-1]], yVal.reshape(yVal.shape[0],yVal.shape[1], 1)[:,1:])) self.model.save('textSumamrizationModel.h5') self.drawModelFromTraining()
def prepare_model(data): max_len_text = 80 max_len_summary = 10 x_tr, x_val, y_tr, y_val = train_test_split(data['cleaned_text'], data['cleaned_summary'], test_size=0.1, random_state=0, shuffle=True) #prepare a tokenizer for reviews on training data x_tokenizer = Tokenizer() x_tokenizer.fit_on_texts(list(x_tr)) #convert text sequences into integer sequences x_tr = x_tokenizer.texts_to_sequences(x_tr) x_val = x_tokenizer.texts_to_sequences(x_val) #padding zero upto maximum length x_tr = pad_sequences(x_tr, maxlen=max_len_text, padding='post') x_val = pad_sequences(x_val, maxlen=max_len_text, padding='post') x_voc_size = len(x_tokenizer.word_index) + 1 #preparing a tokenizer for summary on training data y_tokenizer = Tokenizer() y_tokenizer.fit_on_texts(list(y_tr)) #convert summary sequences into integer sequences y_tr = y_tokenizer.texts_to_sequences(y_tr) y_val = y_tokenizer.texts_to_sequences(y_val) #padding zero upto maximum length y_tr = pad_sequences(y_tr, maxlen=max_len_summary, padding='post') y_val = pad_sequences(y_val, maxlen=max_len_summary, padding='post') y_voc_size = len(y_tokenizer.word_index) + 1 K.clear_session() latent_dim = 500 # Encoder encoder_inputs = Input(shape=(max_len_text, )) enc_emb = Embedding(x_voc_size, latent_dim, trainable=True)(encoder_inputs) #LSTM 1 encoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True) encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb) #LSTM 2 encoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True) encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) #LSTM 3 encoder_lstm3 = LSTM(latent_dim, return_state=True, return_sequences=True) encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2) # Set up the decoder. decoder_inputs = Input(shape=(None, )) dec_emb_layer = Embedding(y_voc_size, latent_dim, trainable=True) dec_emb = dec_emb_layer(decoder_inputs) #LSTM using encoder_states as initial state decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm( dec_emb, initial_state=[state_h, state_c]) #Attention Layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # Concat attention output and decoder LSTM output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) #Dense layer decoder_dense = TimeDistributed(Dense(y_voc_size, activation='softmax')) decoder_outputs = decoder_dense(decoder_concat_input) # Define the model model = Model([encoder_inputs, decoder_inputs], decoder_outputs) model.summary() model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') es = EarlyStopping(monitor='val_loss', mode='min', verbose=1) history = model.fit([x_tr, y_tr[:, :-1]], y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)[:, 1:], epochs=10, callbacks=[es], batch_size=512, validation_data=([x_val, y_val[:, :-1]], y_val.reshape(y_val.shape[0], y_val.shape[1], 1)[:, 1:])) diagnostic_plot(history) reverse_target_word_index = y_tokenizer.index_word reverse_source_word_index = x_tokenizer.index_word target_word_index = y_tokenizer.word_index # encoder inference encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_outputs, state_h, state_c]) # decoder inference # Below tensors will hold the states of the previous time step decoder_state_input_h = Input(shape=(latent_dim, )) decoder_state_input_c = Input(shape=(latent_dim, )) decoder_hidden_state_input = Input(shape=(max_len_text, latent_dim)) # Get the embeddings of the decoder sequence dec_emb2 = dec_emb_layer(decoder_inputs) # To predict the next word in the sequence, set the initial states to the states from the previous time step decoder_outputs2, state_h2, state_c2 = decoder_lstm( dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c]) #attention inference attn_out_inf, attn_states_inf = attn_layer( [decoder_hidden_state_input, decoder_outputs2]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_outputs2, attn_out_inf]) # A dense softmax layer to generate prob dist. over the target vocabulary decoder_outputs2 = decoder_dense(decoder_inf_concat) # Final decoder model decoder_model = Model([decoder_inputs] + [ decoder_hidden_state_input, decoder_state_input_h, decoder_state_input_c ], [decoder_outputs2] + [state_h2, state_c2])
def generator_model(category, content, title, embedding_dim, hidden_size): with open('word_dict/content_ix_to_word_' + category + '.pkl', 'rb') as f: content_ix_to_word = pickle.load(f) with open('word_dict/content_word_to_ix_' + category + '.pkl', 'rb') as f: content_word_to_ix = pickle.load(f) with open('word_dict/title_ix_to_word_' + category + '.pkl', 'rb') as f: title_ix_to_word = pickle.load(f) with open('word_dict/title_word_to_ix_' + category + '.pkl', 'rb') as f: title_word_to_ix = pickle.load(f) pad_num = 0 oov_num = 1 src_vocab = len(content_ix_to_word) tar_vocab = len(title_ix_to_word) index_title = change_word_to_index(title, title_word_to_ix, oov_num) index_content = change_word_to_index(content, content_word_to_ix, oov_num) content_len = max([len(x) - 1 for x in index_content]) title_len = max([len(x) - 1 for x in index_title]) input_idx = seq_padding(index_content, content_len, pad_num, True) target_idx = seq_padding(index_title, title_len, pad_num, False) temp = pd.DataFrame(input_idx).to_numpy() temp = np.array([s[:-1] for s in temp]) input_data = temp temp = pd.DataFrame(target_idx).to_numpy() temp = np.array([s[:-1] for s in temp]) target_data = temp xTrain, xTest, yTrain, yTest = train_test_split(input_data, target_data, test_size=0.2, random_state=777, shuffle=True) ###### 모델 설계 # 인코더 encoder_inputs = Input(shape=(content_len, )) # 인코더의 임베딩 층 enc_emb = Embedding(src_vocab, embedding_dim)(encoder_inputs) # 인코더의 LSTM 1 encoder_lstm1 = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4) encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb) # 인코더의 LSTM 2 encoder_lstm2 = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4) encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) # 인코더의 LSTM 3 encoder_lstm3 = LSTM(hidden_size, return_state=True, return_sequences=True, dropout=0.4, recurrent_dropout=0.4) encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2) # 디코더 decoder_inputs = Input(shape=(None, )) # 디코더의 임베딩 층 dec_emb_layer = Embedding(src_vocab, embedding_dim) dec_emb = dec_emb_layer(decoder_inputs) # 디코더의 LSTM decoder_lstm = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.2) decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c]) # 디코더의 출력층 decoder_softmax_layer = Dense(tar_vocab, activation='softmax') decoder_softmax_outputs = decoder_softmax_layer(decoder_outputs) # 모델 정의 model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs) import urllib.request urllib.request.urlretrieve( "https://raw.githubusercontent.com/thushv89/attention_keras/master/layers/attention.py", filename="attention.py") from attention import AttentionLayer # 어텐션 층(어텐션 함수) attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # 어텐션의 결과와 디코더의 hidden state들을 연결 decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) # 디코더의 출력층 decoder_softmax_layer = Dense(tar_vocab, activation='softmax') decoder_softmax_outputs = decoder_softmax_layer(decoder_concat_input) # 모델 정의 model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs) model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5) model.fit([xTrain, yTrain[:, :-1]], yTrain.reshape(yTrain.shape[0], yTrain.shape[1], 1)[:, 1:], epochs=50, callbacks=[es, TQDMCallback()], batch_size=128, validation_data=([xTest, yTest[:, :-1]], yTest.reshape(yTest.shape[0], yTest.shape[1], 1)[:, 1:])) model.save_weights('weights/' + category + '_checkpoint')
y_train = y_train.take([-1],axis = -1) y_val = y_val.take([-1],axis = -1) assert not np.any(np.isnan(X_train)) assert not np.any(np.isnan(X_val)) assert not np.any(np.isnan(X_covars_val)) assert not np.any(np.isnan(X_covars_train)) assert not np.any(np.isnan(y_val)) assert not np.any(np.isnan(y_train)) encoder_input = Input(shape = (X_train.shape[1],X_train.shape[2]),name = 'encoder_input') decoder_input = Input(shape = (X_covars_train.shape[1],X_covars_train.shape[2]),name = 'decoder_input') encoderLSTM = LSTM(units = latent_dim,return_state = True,return_sequences = True,name = 'enc_LSTM',dropout = dropout_rate) attention1 = AttentionLayer() decoderLSTM = LSTM(units = latent_dim,return_state = True,return_sequences = True,name = 'dec_LSTM',dropout = dropout_rate) dense_output = TimeDistributed(Dense(1),name = 'time_distirbuted_dense_output') gaussian_layer = GaussianLayer(1) ## building model encoder_out, encoder_states = encoderLSTM(encoder_input)[0], encoderLSTM(encoder_input)[1:] decoder_out, decoder_states = decoderLSTM(decoder_input,initial_state = encoder_states)[0],decoderLSTM(decoder_input,initial_state = encoder_states)[1:] # explicitly define tensor shapes as TensorShape([Dimension(128), Dimension(12), Dimension(10)]) and TensorShape([Dimension(128), Dimension(8), Dimension(10)]) attn_out, attn_states = attention1([encoder_out,decoder_out]) decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out])
def test_model( model, attention = False ): if attention == False: wandb.init(config=config_best, project="CS6910-Assignment-3", entity="rahulsundar") config = wandb.config wandb.run.name = ( "Inference_" + str(config.cell_type) + dataBase.source_lang + str(config.numEncoders) + "_" + dataBase.target_lang + "_" + str(config.numDecoders) + "_" + config.optimiser + "_" + str(config.epochs) + "_" + str(config.dropout) + "_" + str(config.batch_size) + "_" + str(config.latentDim) ) wandb.run.save() if config.cell_type == "LSTM": encoder_inputs = model.input[0] if config.numEncoders == 1: encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm").output else: encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm_"+ str(config.numEncoders-1)).output encoder_states = [state_h_enc, state_c_enc] encoder_model = Model(encoder_inputs, encoder_states) decoder_inputs = model.input[1] decoder_state_input_h = Input(shape=(config.latentDim,), name="input_3") decoder_state_input_c = Input(shape=(config.latentDim,), name="input_4") decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_lstm = model.layers[-3] decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs ) decoder_states = [state_h_dec, state_c_dec] decoder_dense = model.layers[-2] decoder_outputs = decoder_dense(decoder_outputs) decoder_dense = model.layers[-1] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states ) elif config.cell_type == "GRU" or config.cell_type == "RNN": encoder_inputs = model.input[0] if config.cell_type == "GRU": if config.numEncoders == 1: encoder_outputs, state = model.get_layer(name = "gru").output else: encoder_outputs, state = model.get_layer(name = "gru_"+ str(config.numEncoders-1)).output else: if config.numEncoders == 1: encoder_outputs, state = model.get_layer(name = "simple_rnn").output else: encoder_outputs, state = model.get_layer(name = "simple_rnn_"+ str(config.numEncoders-1)).output encoder_states = [state] encoder_model = Model(encoder_inputs, encoder_states) decoder_inputs = model.input[1] decoder_state = Input(shape=(config.latentDim,), name="input_3") decoder_states_inputs = [decoder_state] decoder_gru = model.layers[-3] (decoder_outputs, state,) = decoder_gru(decoder_inputs, initial_state=decoder_states_inputs) decoder_states = [state] decoder_dense = model.layers[-2] decoder_outputs = decoder_dense(decoder_outputs) decoder_dense = model.layers[-1] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states ) def decode_sequence(input_seq): # Encode the input as state vectors. states_value = encoder_model.predict(input_seq) # Generate empty target sequence of length 1. target_seq = np.zeros((1, 1, len(dataBase.target_char2int))) # Populate the first character of target sequence with the start character. target_seq[0, 0, dataBase.target_char2int["\n"]] = 1.0 # Sampling loop for a batch of sequences # (to simplify, here we assume a batch of size 1). stop_condition = False decoded_sentence = "" while not stop_condition: if config.cell_type == "LSTM": output_tokens, h, c = decoder_model.predict([target_seq] + states_value) elif config.cell_type == "RNN" or config.cell_type == "GRU": states_value = states_value[0].reshape((1, 256)) output_tokens, h = decoder_model.predict([target_seq] + [states_value]) # Sample a token sampled_token_index = np.argmax(output_tokens[0, -1, :]) sampled_char = dataBase.target_int2char[sampled_token_index] decoded_sentence += sampled_char # Exit condition: either hit max length # or find stop character. if sampled_char == "\n" or len(decoded_sentence) > 25: stop_condition = True # Update the target sequence (of length 1). target_seq = np.zeros((1, 1, len(dataBase.target_char2int))) target_seq[0, 0, sampled_token_index] = 1.0 # Update states if config.cell_type == "LSTM": states_value = [h, c] elif config.cell_type == "RNN" or config.cell_type == "GRU": states_value = [h] return decoded_sentence acc = 0 sourcelang = [] predictions = [] original = [] for i, row in dataBase.test.iterrows(): input_seq = dataBase.test_encoder_input[i : i + 1] decoded_sentence = decode_sequence(input_seq) og_tokens = [dataBase.target_char2int[x] for x in row["tgt"]] predicted_tokens = [dataBase.target_char2int[x] for x in decoded_sentence.rstrip("\n")] # if decoded_sentence == row['tgt']: # acc += 1 sourcelang.append(row['src']) original.append(row['tgt']) predictions.append(decoded_sentence) if og_tokens == predicted_tokens: acc += 1 if i % 100 == 0: print(f"Finished {i} examples") print(f"Source: {row['src']}") print(f"Original: {row['tgt']}") print(f"Predicted: {decoded_sentence}") print(f"Accuracy: {acc / (i+1)}") print(og_tokens) print(predicted_tokens) print(f'Test Accuracy: {acc}') wandb.log({'test_accuracy': acc / len(dataBase.test)}) wandb.finish() return acc / len(dataBase.test), sourcelang, original, predictions elif attention == True: wandb.init(config=config_best_attention2, project="CS6910-Assignment-3", entity="rahulsundar") config = wandb.config wandb.run.name = ( "Inference_WithAttn_" + str(config.cell_type) + dataBase.source_lang + str(config.numEncoders) + "_" + dataBase.target_lang + "_" + str(config.numDecoders) + "_" + config.optimiser + "_" + str(config.epochs) + "_" + str(config.dropout) + "_" + str(config.batch_size) + "_" + str(config.latentDim) ) wandb.run.save() if config.cell_type == "LSTM": encoder_inputs = model.input[0] if config.numEncoders == 1: encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm").output else: encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm_"+ str(config.numEncoders-1)).output encoder_first_outputs, _, _ = model.get_layer(name = "lstm").output encoder_states = [state_h_enc, state_c_enc] encoder_model = Model(encoder_inputs, encoder_states) decoder_inputs = model.input[1] decoder_state_input_h = Input(shape=(config.latentDim,), name="input_3") decoder_state_input_c = Input(shape=(config.latentDim,), name="input_4") decoder_hidden_state = Input(shape=(None,config["latentDim"]), name = "input_5") decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] #decoder_lstm = model.layers[-3] decoder_lstm = model.get_layer(name = "lstm_"+ str(config.numEncoders + config.numDecoders -1)) decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs ) decoder_states = [state_h_dec, state_c_dec] attention_layer = model.get_layer(name = "attention_layer")#AttentionLayer(name='attention_layer') attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_outputs]) decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out]) decoder_dense = model.layers[-2] decoder_time = TimeDistributed(decoder_dense) hidden_outputs = decoder_time(decoder_concat_input) decoder_dense = model.layers[-1] decoder_outputs = decoder_dense(hidden_outputs) decoder_model = Model(inputs = [decoder_inputs] + [decoder_hidden_state , decoder_states_inputs], outputs = [decoder_outputs] + decoder_states) #decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states ) elif config.cell_type == "GRU" or config.cell_type == "RNN": encoder_inputs = model.input[0] if config.cell_type == "GRU": if config.numEncoders == 1: encoder_outputs, state = model.get_layer(name = "gru").output else: encoder_outputs, state = model.get_layer(name = "gru_"+ str(config.numEncoders-1)).output encoder_first_outputs, _ = model.get_layer(name = "gru").output else: if config.numEncoders == 1: encoder_outputs, state = model.get_layer(name = "simple_rnn").output else: encoder_outputs, state = model.get_layer(name = "simple_rnn_"+ str(config.numEncoders-1)).output encoder_first_outputs, _ = model.get_layer(name = "simple_rnn").output encoder_states = [state] encoder_model = Model(encoder_inputs, outputs = [encoder_first_outputs, encoder_outputs] + encoder_states) decoder_inputs = model.input[1] decoder_state = Input(shape=(config.latentDim,), name="input_3") decoder_hidden_state = Input(shape=(None,config["latentDim"]), name = "input_4") decoder_states_inputs = [decoder_state] if config.cell_type == "GRU": decoder_gru = model.get_layer(name = "gru_"+ str(config.numEncoders + config.numDecoders -1))#model.layers[-3] (decoder_outputs, state) = decoder_gru(decoder_inputs, initial_state=decoder_states_inputs) decoder_states = [state] else: decoder_gru = model.get_layer(name = "simple_rnn_"+ str(config.numEncoders + config.numDecoders -1))#model.layers[-3] (decoder_outputs, state) = decoder_gru(decoder_inputs, initial_state=decoder_states_inputs) decoder_states = [state] attention_layer = AttentionLayer(name='attention_layer') #decoder_outputs_att = decoder_ouputs attention_out, attention_states = attention_layer([decoder_hidden_state, decoder_outputs]) decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out]) decoder_dense = model.layers[-2] decoder_time = TimeDistributed(decoder_dense) hidden_outputs = decoder_time(decoder_concat_input) decoder_dense = model.layers[-1] decoder_outputs = decoder_dense(hidden_outputs) decoder_model = Model(inputs = [decoder_inputs] + [decoder_hidden_state , decoder_states_inputs], outputs = [decoder_outputs] + decoder_states) def decode_sequence(input_seq): # Encode the input as state vectors. encoder_first_outputs, _, states_value = encoder_model.predict(input_seq) # Generate empty target sequence of length 1. target_seq = np.zeros((1, 1, len(dataBase.target_char2int))) # Populate the first character of target sequence with the start character. target_seq[0, 0, dataBase.target_char2int["\n"]] = 1.0 # Sampling loop for a batch of sequences # (to simplify, here we assume a batch of size 1). stop_condition = False decoded_sentence = "" attention_weights = [] while not stop_condition: if config.cell_type == "LSTM": output_tokens, h, c = decoder_model.predict([target_seq, encoder_first_outputs] + states_value) elif config.cell_type == "RNN" or config.cell_type == "GRU": states_value = states_value[0].reshape((1, config.latentDim)) output_tokens, h = decoder_model.predict([target_seq] + [encoder_first_outputs] + [states_value]) #dec_ind = np.argmax(output_tokens, axis=-1)[0, 0] #attention_weights.append((dec_ind, attn_states)) # Sample a token sampled_token_index = np.argmax(output_tokens[0, -1, :]) sampled_char = dataBase.target_int2char[sampled_token_index] decoded_sentence += sampled_char # Exit condition: either hit max length # or find stop character. if sampled_char == "\n" or len(decoded_sentence) > 25: stop_condition = True # Update the target sequence (of length 1). target_seq = np.zeros((1, 1, len(dataBase.target_char2int))) target_seq[0, 0, sampled_token_index] = 1.0 # Update states if config.cell_type == "LSTM": states_value = [h, c] elif config.cell_type == "RNN" or config.cell_type == "GRU": states_value = [h] return decoded_sentence #, attention_weights acc = 0 sourcelang = [] predictions = [] original = [] #attention_weights_test = [] for i, row in dataBase.test.iterrows(): input_seq = dataBase.test_encoder_input[i : i + 1] decoded_sentence, attention_weights = decode_sequence(input_seq) og_tokens = [dataBase.target_char2int[x] for x in row["tgt"]] predicted_tokens = [dataBase.target_char2int[x] for x in decoded_sentence.rstrip("\n")] # if decoded_sentence == row['tgt']: # acc += 1 sourcelang.append(row['src']) original.append(row['tgt']) predictions.append(decoded_sentence) #attention_weights_test.append(attention_weights) if og_tokens == predicted_tokens: acc += 1 if i % 100 == 0: print(f"Finished {i} examples") print(f"Source: {row['src']}") print(f"Original: {row['tgt']}") print(f"Predicted: {decoded_sentence}") print(f"Accuracy: {acc / (i+1)}") print(og_tokens) print(predicted_tokens) print(f'Test Accuracy: {acc}') wandb.log({'test_accuracy': acc / len(dataBase.test)}) wandb.finish() return acc / len(dataBase.test) , sourcelang, original, predictions #, attention_weights_test
def model_create(category, embedding_dim, hidden_size): word_len_dict = { '날씨': [696, 4100], '사건_사고': [2999, 11623], '뇌물수수': [2668, 16511] } tar_vocab, src_vocab = word_len_dict[category] content_len_dict = { '날씨': [976, 12], '사건_사고': [1134, 13], '뇌물수수': [1417, 17] } content_len, title_len = content_len_dict[category] ###### 모델 설계 # 인코더 encoder_inputs = Input(shape=(content_len, )) # 인코더의 임베딩 층 enc_emb = Embedding(src_vocab, embedding_dim)(encoder_inputs) # 인코더의 LSTM 1 encoder_lstm1 = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4) encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb) # 인코더의 LSTM 2 encoder_lstm2 = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4) encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) # 인코더의 LSTM 3 encoder_lstm3 = LSTM(hidden_size, return_state=True, return_sequences=True, dropout=0.4, recurrent_dropout=0.4) if category == '사건_사고': encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2) else: encoder_outpus3, state_h3, state_c3 = encoder_lstm3(endcoder_output2) # 인코더의 LSTM 4 encoder_lstm4 = LSTM(hidden_size, return_state=True, return_sequences=True, dropout=0.4, recurrent_dropout=0.4) encoder_outputs, state_h, state_c = encoder_lstm4(encoder_output3) # 디코더 decoder_inputs = Input(shape=(None, )) # 디코더의 임베딩 층 dec_emb_layer = Embedding(src_vocab, embedding_dim) dec_emb = dec_emb_layer(decoder_inputs) # 디코더의 LSTM decoder_lstm = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.2) decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c]) # 디코더의 출력층 decoder_softmax_layer = Dense(tar_vocab, activation='softmax') decoder_softmax_outputs = decoder_softmax_layer(decoder_outputs) # 모델 정의 model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs) import urllib.request urllib.request.urlretrieve( "https://raw.githubusercontent.com/thushv89/attention_keras/master/layers/attention.py", filename="attention.py") from attention import AttentionLayer # 어텐션 층(어텐션 함수) attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # 어텐션의 결과와 디코더의 hidden state들을 연결 decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) # 디코더의 출력층 decoder_softmax_layer = Dense(tar_vocab, activation='softmax') decoder_softmax_outputs = decoder_softmax_layer(decoder_concat_input) # 모델 정의 model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs) if category == '날씨': model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') elif category == '사건_사고': model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') else: from tensorflow.keras.optimizers import * adam = optimizers.Adam(lr=0.01, beta_1=0.8, beta_2=0.999, epsilon=None, decay=1e-5, amsgrad=False) model.compile(optimizer=adam, loss='sparse_categorical_crossentropy') outputs = [ encoder_inputs, encoder_outputs, state_h, state_c, decoder_inputs, dec_emb_layer, decoder_lstm, decoder_softmax_layer ] return model, outputs
def define_nmt(hidden_size, batch_size, eng_timesteps, eng_vocab_size, ger_timesteps, ger_vocab_size): """ Defining a NMT model """ # Define an input sequence and process it. embedding_size = 100 if batch_size: encoder_inputs = Input(batch_shape=(batch_size, eng_timesteps), name='encoder_inputs') decoder_inputs = Input(batch_shape=(batch_size, ger_timesteps - 1), name='decoder_inputs') # else: # encoder_inputs = Input(shape=(eng_timesteps), name='encoder_inputs') # decoder_inputs = Input(shape=(fr_timesteps - 1, fr_vsize), name='decoder_inputs') encoder_embedding = Embedding(input_dim=eng_vocab_size, output_dim=embedding_size) embedded_encoder_inputs = encoder_embedding(encoder_inputs) # Encoder GRU encoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru( embedded_encoder_inputs) decoder_embedding = Embedding(input_dim=ger_vocab_size, output_dim=embedding_size) embedded_decoder_inputs = decoder_embedding(decoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='decoder_gru'), name='bidirectional_decoder') decoder_out, decoder_fwd_state, decoder_back_state = decoder_gru( embedded_decoder_inputs, initial_state=[encoder_fwd_state, encoder_back_state]) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(ger_vocab_size, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy') full_model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, eng_timesteps), name='encoder_inf_inputs') encoder_inf_embedded_inputs = encoder_embedding(encoder_inf_inputs) encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru( encoder_inf_embedded_inputs) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[ encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state ]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, eng_timesteps, 2 * hidden_size), name='encoder_inf_states') decoder_init_fwd_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_fwd_init') decoder_init_back_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_back_init') decoder_inf_embedded_inputs = decoder_embedding(decoder_inf_inputs) decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder_gru( decoder_inf_embedded_inputs, initial_state=[decoder_init_fwd_state, decoder_init_back_state]) attn_inf_out, attn_inf_states = attn_layer( [encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = Model(inputs=[ encoder_inf_states, decoder_init_fwd_state, decoder_init_back_state, decoder_inf_inputs ], outputs=[ decoder_inf_pred, attn_inf_states, decoder_inf_fwd_state, decoder_inf_back_state ]) # encoder_model = "" # decoder_model = "" return full_model, encoder_model, decoder_model
def build_attention_model(self): if self.cell_type == "RNN": # encoder encoder_inputs = Input(shape=(None, len(self.srcChar2Int))) encoder_outputs = encoder_inputs for i in range(1, self.numEncoders + 1): encoder = SimpleRNN( self.latentDim, return_state=True, return_sequences=True, dropout=self.dropout, ) encoder_outputs, state = encoder(encoder_inputs) if i == 1: encoder_first_outputs= encoder_outputs encoder_states = [state] # decoder decoder_inputs = Input(shape=(None, len(self.tgtChar2Int))) decoder_outputs = decoder_inputs for i in range(1, self.numDecoders + 1): decoder = SimpleRNN( self.latentDim, return_sequences=True, return_state=True, dropout=self.dropout, ) decoder_outputs, _ = decoder(decoder_inputs, initial_state=encoder_states) if i == self.numDecoders: decoder_first_outputs = decoder_outputs attention_layer = AttentionLayer(name='attention_layer') attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs]) decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out]) # dense hidden = Dense(self.hidden, activation="relu") hidden_time = TimeDistributed(hidden, name='time_distributed_layer') hidden_outputs = hidden(decoder_concat_input) decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax") decoder_outputs = decoder_dense(hidden_outputs) model = Model([encoder_inputs, decoder_inputs], decoder_outputs) return model elif self.cell_type == "LSTM": # encoder encoder_inputs = Input(shape=(None, len(self.srcChar2Int))) encoder_outputs = encoder_inputs for i in range(1, self.numEncoders + 1): encoder = LSTM( self.latentDim, return_state=True, return_sequences=True, dropout=self.dropout, ) encoder_outputs, state_h, state_c = encoder(encoder_outputs) if i == 1: encoder_first_outputs= encoder_outputs encoder_states = [state_h, state_c] # decoder decoder_inputs = Input(shape=(None, len(self.tgtChar2Int))) decoder_outputs = decoder_inputs for i in range(1, self.numDecoders + 1): decoder = LSTM( self.latentDim, return_state=True, return_sequences=True, dropout=self.dropout, ) decoder_outputs, _, _ = decoder( decoder_outputs, initial_state=encoder_states ) if i == self.numDecoders: decoder_first_outputs = decoder_outputs attention_layer = AttentionLayer(name='attention_layer') attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs]) decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out]) # dense hidden = Dense(self.hidden, activation="relu") hidden_time = TimeDistributed(hidden, name='time_distributed_layer') hidden_outputs = hidden(decoder_concat_input) decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax") decoder_outputs = decoder_dense(hidden_outputs) model = Model([encoder_inputs, decoder_inputs], decoder_outputs) return model elif self.cell_type == "GRU": # encoder encoder_inputs = Input(shape=(None, len(self.srcChar2Int))) encoder_outputs = encoder_inputs for i in range(1, self.numEncoders + 1): encoder = GRU( self.latentDim, return_state=True, return_sequences=True, dropout=self.dropout, ) encoder_outputs, state = encoder(encoder_inputs) if i == 1: encoder_first_outputs= encoder_outputs encoder_states = [state] # decoder decoder_inputs = Input(shape=(None, len(self.tgtChar2Int))) decoder_outputs = decoder_inputs for i in range(1, self.numDecoders + 1): decoder = GRU( self.latentDim, return_sequences=True, return_state=True, dropout=self.dropout, ) decoder_outputs, _ = decoder(decoder_inputs, initial_state=encoder_states) if i == self.numDecoders: decoder_first_outputs = decoder_outputs attention_layer = AttentionLayer(name='attention_layer') attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs]) decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out]) # dense hidden = Dense(self.hidden, activation="relu") hidden_time = TimeDistributed(hidden, name='time_distributed_layer') hidden_outputs = hidden(decoder_concat_input) decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax") decoder_outputs = decoder_dense(hidden_outputs) model = Model([encoder_inputs, decoder_inputs], decoder_outputs) return model
def decode_layer(self): # 인코더 encoder_inputs = Input(shape=(self.text_max_len, )) # 인코더의 임베딩 층 enc_emb = Embedding(self.src_vocab, self.embedding_dim)(encoder_inputs) # 인코더의 LSTM 1 encoder_lstm1 = LSTM(self.hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4) encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb) # 인코더의 LSTM 2 encoder_lstm2 = LSTM(self.hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4) encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) # 인코더의 LSTM 3 encoder_lstm3 = LSTM(self.hidden_size, return_state=True, return_sequences=True, dropout=0.4, recurrent_dropout=0.4) encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2) # 디코더 decoder_inputs = Input(shape=(None, )) # 디코더의 임베딩 층 dec_emb_layer = Embedding(self.tar_vocab, self.embedding_dim) dec_emb = dec_emb_layer(decoder_inputs) # 디코더의 LSTM decoder_lstm = LSTM(self.hidden_size, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.2) decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c]) # 어텐션 층(어텐션 함수) attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # 어텐션의 결과와 디코더의 hidden state들을 연결 decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) # 디코더의 출력층 decoder_softmax_layer = Dense(self.tar_vocab, activation='softmax') decoder_softmax_outputs = decoder_softmax_layer(decoder_concat_input) # 모델 정의 model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs) model.summary() model.load_weights('APP/Data/attention_best_model_v2.h5') model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') # seq2seq + attention으로 요약 self.tar_word_to_index = self.tar_tokenizer.word_index # 요약 단어 집합에서 단어 -> 정수를 얻음 self.tar_index_to_word = self.tar_tokenizer.index_word # 요약 단어 집합에서 정수 -> 단어를 얻음 # 인코더 설계 self.encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_outputs, state_h, state_c]) # 이전 시점의 상태들을 저장하는 텐서 decoder_state_input_h = Input(shape=(self.hidden_size, )) decoder_state_input_c = Input(shape=(self.hidden_size, )) dec_emb2 = dec_emb_layer(decoder_inputs) # 문장의 다음 단어를 예측하기 위해서 초기 상태(initial_state)를 이전 시점의 상태로 사용 decoder_outputs2, state_h2, state_c2 = decoder_lstm( dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c]) # 어텐션 함수 decoder_hidden_state_input = Input(shape=(self.text_max_len, self.hidden_size)) attn_out_inf, attn_states_inf = attn_layer( [decoder_hidden_state_input, decoder_outputs2]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_outputs2, attn_out_inf]) # 디코더의 출력층 decoder_outputs2 = decoder_softmax_layer(decoder_inf_concat) # 최종 디코더 모델 self.decoder_model = Model([decoder_inputs] + [ decoder_hidden_state_input, decoder_state_input_h, decoder_state_input_c ], [decoder_outputs2] + [state_h2, state_c2])
recurrent_dropout=0.4) encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2) decoder_inputs = Input(shape=(None, )) dec_emb_layer = Embedding(y_voc, embedding_dim, trainable=True) dec_emb = dec_emb_layer(decoder_inputs) decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.2) decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm( dec_emb, initial_state=[state_h, state_c]) attn_layer = AttentionLayer(name="attention_layer") attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) decoder_dense = TimeDistributed(Dense(y_voc, activation='softmax')) decoder_outputs = decoder_dense(decoder_concat_input) model = Model([encoder_inputs, decoder_inputs], decoder_outputs) model.summary() model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2) history = model.fit([x_tr, y_tr[:, :-1]],
#LSTM 3 encoder_lstm3=LSTM(latent_dim, return_state=True, return_sequences=True) encoder_outputs, state_h, state_c= encoder_lstm3(encoder_output2) ''' # Set up the decoder. decoder_inputs = Input(shape=(None, )) dec_emb_layer = Embedding(y_voc_size, latent_dim, trainable=True) dec_emb = dec_emb_layer(decoder_inputs) #LSTM using encoder_states as initial state decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm( dec_emb, initial_state=[state_h, state_c]) #Attention Layer attn_out, attn_states = AttentionLayer(name='attention_layer')( [encoder_output, decoder_outputs]) # Concat attention output and decoder LSTM output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) #Dense layer decoder_dense = TimeDistributed(Dense(y_voc_size, activation='softmax')) decoder_outputs = decoder_dense(decoder_concat_input) # Define the model model = Model([encoder_inputs, decoder_inputs], decoder_outputs) model.summary() model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')
decoder_inputs = Input(shape=(None, )) #embedding layer dec_emb_layer = Embedding(y_voc, embedding_dim, trainable=True) dec_emb = dec_emb_layer(decoder_inputs) decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.2) decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm( dec_emb, initial_state=[state_h, state_c]) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # Concat attention input and decoder LSTM output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) #dense layer decoder_dense = TimeDistributed(Dense(y_voc, activation='softmax')) decoder_outputs = decoder_dense(decoder_concat_input) # Define the model model = Model([encoder_inputs, decoder_inputs], decoder_outputs) model.summary()
def create_model(latent_dim, bidirectional): attention = False ## Create encoder encoder_inputs = Input(shape=(None, 14)) if bidirectional: encoder = Bidirectional(LSTM(latent_dim, return_state=True)) encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder( encoder_inputs) state_h = Concatenate()([forward_h, backward_h]) state_c = Concatenate()([forward_c, backward_c]) print('state_h.size', k.shape(state_h)) print('state_c.size', k.shape(state_c)) else: encoder = LSTM(latent_dim, return_sequences=True, return_state=True) # #encoder = LSTM(latent_dim, return_state = True) encoder_outputs, state_h, state_c = encoder(encoder_inputs) encoder_states = [state_h, state_c] ## Keep encoder as separate model enc_model = Model(encoder_inputs, encoder_states, name='encoder_model') ## Create decoder decoder_input = Input(shape=(None, 38)) if bidirectional: in_h_state = Input(shape=(2 * latent_dim, )) in_e_state = Input(shape=(2 * latent_dim, )) decoder_lstm = LSTM(latent_dim * 2, return_sequences=True, return_state=True) else: in_h_state = Input(shape=(latent_dim, )) in_e_state = Input(shape=(latent_dim, )) decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, dec_h_state, dec_c_state = decoder_lstm( decoder_input, initial_state=[in_h_state, in_e_state]) if attention: #--------------------------------------------------------------------------------------------------------------------- # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) #--------------------------------------------------------------------------------------------------------------------- decoder_dense = Dense(38, activation='softmax') decoder_outputs = decoder_dense(decoder_outputs) ## Keep decoder as separate model dec_model = Model([decoder_input, in_h_state, in_e_state], [decoder_outputs, dec_h_state, dec_c_state], name='decoder_model') ## Combined encoder and decoder model ## Inputs are encoder input and decoder input ## Output is the 0th output of dec_model (not the states) when applied on decoder input with encoded states model = Model([encoder_inputs, decoder_input], dec_model([decoder_input] + enc_model(encoder_inputs))[0], name='combined_model') model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy']) ## Return all 3 models model.summary() #input('pause') return enc_model, dec_model, model