Exemple #1
0
def build_model3():
    input_layer = Input(shape=(5000, 4))

    conv_layer1 = Conv1D(filters=64,
                         kernel_size=20,
                         padding='same',
                         activation='relu',
                         strides=1,
                         kernel_regularizer=regularizers.l2(1e-5),
                         bias_regularizer=regularizers.l2(1e-5),
                         name='cnn1')(input_layer)

    bn1 = BatchNormalization(name='bn1')(conv_layer1)

    conv_layer2 = Conv1D(filters=128,
                         kernel_size=20,
                         padding='same',
                         activation='relu',
                         strides=1,
                         kernel_regularizer=regularizers.l2(1e-5),
                         bias_regularizer=regularizers.l2(1e-5),
                         name='cnn2')(bn1)

    bn2 = BatchNormalization(name='bn2')(conv_layer2)

    reshape = Reshape(
        (int(5000 * 2 / filter_length), int(filter_length / 2), 128))(bn2)
    attention_pooling = TimeDistributed(AttentionLayer(),
                                        name='attentionPooling')(reshape)

    bn3 = BatchNormalization(name='bn3')(attention_pooling)

    bilstm_layer = Bidirectional(LSTM(units=lstm_units,
                                      return_sequences=True,
                                      kernel_regularizer=regularizers.l2(1e-5),
                                      bias_regularizer=regularizers.l2(1e-5)),
                                 name='bilstm')(bn3)

    bn4 = BatchNormalization(name='bn4')(bilstm_layer)

    flatten = Flatten()(bn4)

    dense_layer = Dense(units=danse_units,
                        kernel_regularizer=regularizers.l2(1e-5),
                        bias_regularizer=regularizers.l2(1e-5),
                        activation='relu',
                        name='dense')(flatten)

    bn5 = BatchNormalization(name='bn5')(dense_layer)
    #dp2=Dropout(0.5)(bn6)

    output_layer = Dense(units=1,
                         kernel_regularizer=regularizers.l2(1e-5),
                         bias_regularizer=regularizers.l2(1e-5),
                         activation='sigmoid',
                         name='classify')(bn5)

    model = Model(input=input_layer, output=output_layer)

    return model
def model(max_features,maxlen,attention=True):
    """
    build a model with bi-gru ,you also can choose add attention layer or not
    you should define the max_features and maxlen
    :param max_features:
    :param maxlen:
    :param attention:
    :return:
    """
    embedding_layer = Embedding(input_dim=max_features,
                                output_dim=128,
                                input_length=maxlen,
                                trainable=True)

    sequence_input = Input(shape=(maxlen,), dtype='int32')
    embedded_sequences = embedding_layer(sequence_input)
    gru = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
    if attention:
        att = AttentionLayer()(gru)
        preds = Dense(1, activation='sigmoid')(att)
    else:
        flat = GlobalAvgPool1D()(gru)
        preds = Dense(1, activation='sigmoid')(flat)
    model = Model(sequence_input, preds)
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['acc'])

    model.summary()
    return model
Exemple #3
0
def build_model2():
    input_layer = Input(shape=(5000, 4))

    conv_layer1 = Conv1D(filters=64,
                         kernel_size=20,
                         padding='same',
                         activation='relu',
                         strides=1,
                         kernel_regularizer=regularizers.l2(1e-5),
                         bias_regularizer=regularizers.l2(1e-5),
                         name='cnn1')(input_layer)

    bn1 = BatchNormalization(name='bn1')(conv_layer1)

    conv_layer2 = Conv1D(filters=128,
                         kernel_size=20,
                         padding='same',
                         activation='relu',
                         strides=1,
                         kernel_regularizer=regularizers.l2(1e-5),
                         bias_regularizer=regularizers.l2(1e-5),
                         name='cnn2')(bn1)

    bn2 = BatchNormalization(name='bn2')(conv_layer2)

    max_pool_layer = MaxPooling1D(pool_size=int(filter_length / 2),
                                  strides=int(filter_length / 2),
                                  padding='same')(bn2)

    bn3 = BatchNormalization(name='bn3')(max_pool_layer)

    bilstm_layer = Bidirectional(LSTM(units=lstm_units,
                                      return_sequences=True,
                                      kernel_regularizer=regularizers.l2(1e-5),
                                      bias_regularizer=regularizers.l2(1e-5)),
                                 name='lstm')(bn3)

    bn4 = BatchNormalization(name='bn4')(bilstm_layer)

    attention_layer = AttentionLayer(name='attention')(bn4)

    bn5 = BatchNormalization(name='bn5')(attention_layer)

    dense_layer = Dense(units=danse_units,
                        kernel_regularizer=regularizers.l2(1e-5),
                        bias_regularizer=regularizers.l2(1e-5),
                        activation='relu',
                        name='dense')(bn5)

    bn6 = BatchNormalization(name='bn6')(dense_layer)

    output_layer = Dense(units=1,
                         kernel_regularizer=regularizers.l2(1e-5),
                         bias_regularizer=regularizers.l2(1e-5),
                         activation='sigmoid',
                         name='classify')(bn6)

    model = Model(input=input_layer, output=output_layer)

    return model
Exemple #4
0
    def __init__(self,
                 args,
                 rnn_type,
                 vocab_size,
                 embedding_dim,
                 hidden_size,
                 num_layers,
                 dropout=0.5,
                 bidirectional=False,
                 pretrained_embedding=None):
        super(RNNModel, self).__init__()
        self.encoder = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = getattr(nn, rnn_type)(embedding_dim,
                                         hidden_size,
                                         num_layers,
                                         bias=False,
                                         dropout=dropout,
                                         bidirectional=bidirectional)
        self.decoder = nn.Linear(hidden_size, 1)
        self.decoder_bi = nn.Linear(hidden_size * 2, 1)
        self.AttentionLayer = AttentionLayer(args, hidden_size)

        self.auglinear = nn.Linear(hidden_size * 2, args.document_hidden_size)
        self.decoder_ = nn.Linear(args.document_hidden_size, 1)

        self.init_weights()

        self.args = args
        self.bidirectional = bidirectional
        self.rnn_type = rnn_type
        self.hidden_size = hidden_size
        self.num_layers = num_layers
Exemple #5
0
    def __init__(self, config, tok2i, sampler, encoder):
        super(LSTMDecoder, self).__init__()
        self.fc_dim = config['fc_dim']  # fc?
        self.dec_lstm_dim = config['dec_lstm_dim']
        self.dec_n_layers = config['dec_n_layers']  # decoder layers number
        self.n_classes = config['n_classes']  # number of token classes
        self.word_emb_dim = config[
            'word_emb_dim']  # dimension of word embedding
        self.device = config['device']
        self.longest_label = config['longest_label']
        self.model_type = config['model_type']
        self.aux_end = config.get('aux_end', False)
        self.encoder = encoder  # encoder is ONLSTMEncoder

        # -- Decoder
        self.dec_lstm_input_dim = config.get('dec_lstm_input_dim',
                                             self.word_emb_dim)
        self.dec_lstm = nn.LSTM(self.dec_lstm_input_dim,
                                self.dec_lstm_dim,
                                self.dec_n_layers,
                                batch_first=True)  # use torch implemented LSTM
        self.dec_emb = nn.Embedding(self.n_classes, self.word_emb_dim)
        if config['nograd_emb']:
            self.dec_emb.weight.requires_grad = False
        self.dropout = nn.Dropout(p=config['dropout'])

        # Layers for mapping LSTM output to scores
        self.o2emb = nn.Linear(self.dec_lstm_dim, self.word_emb_dim)
        # Optionally use the (|V| x d_emb) matrix from the embedding layer here.
        if config['share_inout_emb']:  # in bagorder, default is True
            self.out_bias = nn.Parameter(
                torch.zeros(self.n_classes).uniform_(0.01))
            self.emb2score = lambda x: F.linear(
                x, self.dec_emb.weight, self.out_bias
            )  # emb2score(x) = x*dec_emb.weight + out_bias
        else:
            self.emb2score = nn.Linear(self.word_emb_dim, self.n_classes)

        self.register_buffer('START', torch.LongTensor([tok2i['<s>']]))
        self.sampler = sampler
        self.end = tok2i['<end>']

        if self.aux_end:
            self.o2stop = nn.Sequential(
                nn.Linear(self.dec_lstm_dim, self.word_emb_dim), nn.ReLU(),
                self.dropout, nn.Linear(self.word_emb_dim, 1), nn.Sigmoid())

        if self.model_type == 'translation':
            self.enc_to_h0 = nn.Linear(
                config['enc_lstm_dim'] * config['num_dir_enc'],
                self.dec_n_layers * self.dec_lstm_dim)
            self.attention = AttentionLayer(
                input_dim=self.dec_lstm_dim,
                hidden_size=self.dec_lstm_dim,
                bidirectional=config['num_dir_enc'] == 2)
            self.decode = self.forward_decode_attention
            self.decode = self.forward_decode  # temporarily use this
            self.dec_emb.weight = self.encoder.emb.weight
        else:
            self.decode = self.forward_decode  # in bagorder, decode is forward_decode
Exemple #6
0
    def __init__(self,
                 last_stride,
                 block,
                 layers,
                 baseWidth=26,
                 scale=4,
                 num_classes=1000,
                 with_attention=False):
        self.inplanes = 64
        super(Res2Net, self).__init__()
        self.baseWidth = baseWidth
        self.scale = scale
        self.conv1 = nn.Sequential(nn.Conv2d(3, 32, 3, 2, 1, bias=False),
                                   nn.BatchNorm2d(32), nn.ReLU(inplace=True),
                                   nn.Conv2d(32, 32, 3, 1, 1, bias=False),
                                   nn.BatchNorm2d(32), nn.ReLU(inplace=True),
                                   nn.Conv2d(32, 64, 3, 1, 1, bias=False))
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=last_stride)

        self.with_attention = with_attention
        if self.with_attention:
            # spatial attention part
            self.attention1 = AttentionLayer(256, 16)
            self.attention2 = AttentionLayer(512, 32)

        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.last_linear = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
Exemple #7
0
 def load_model(self, filepath: str):
     """Loads a model with a custom AttentionLayer property."""
     model = load_model(
         filepath, custom_objects={"AttentionLayer": AttentionLayer(Layer)})
     if model:
         self.model = model
         return model
     else:
         return None
Exemple #8
0
def decoder(ENCODER_OUTPUTS, STATE_H, STATE_C,STATE_H2, STATE_C2,ENCODER_INPUTS):
    '''creating the decoder layer'''
    # Set up the decoder, using `encoder_states` as initial state.
    DECODER_INPUTS = Input(shape=(None,))
    
    # embedding layer
    DEC_EMB_LAYER = Embedding(Y_VOC, EMBEDDING_DIM, trainable=True)
    DEC_EMB = DEC_EMB_LAYER(DECODER_INPUTS)
    
    DECODER_LSTM = LSTM(LATENT_DIM, return_sequences=True, return_state=True,
                        dropout=0.4, recurrent_dropout=0.2)
    DECODER_OUTPUTS, DECODER_FWD_STATE, DECODER_BACK_STATE = DECODER_LSTM(
        DEC_EMB,
        initial_state=[STATE_H, STATE_C])
    
    # Attention layer
    ATTN_LAYER = AttentionLayer(name='attention_layer')
    ATTN_OUT, ATTN_STATES = ATTN_LAYER([ENCODER_OUTPUTS, DECODER_OUTPUTS])
    
    # Concat attention input and decoder LSTM output
    DECODER_CONCAT_INPUT = Concatenate(axis=-1, name='concat_layer')(
        [DECODER_OUTPUTS, ATTN_OUT])
    
    # dense layer
    DECODER_DENSE = TimeDistributed(Dense(Y_VOC, activation='softmax'))
    DECODER_OUTPUTS = DECODER_DENSE(DECODER_CONCAT_INPUT)
    # Decoder setup
    # Below tensors will hold the states of the previous time step
    DECODER_STATE_INPUT_H = Input(shape=(LATENT_DIM,))
    DECODER_STATE_INPUT_C = Input(shape=(LATENT_DIM,))
    DECODER_HIDDEN_STATE_INPUT = Input(shape=(MAX_TEXT_LEN,
                                              LATENT_DIM))
    
    # Get the embeddings of the decoder sequence
    DEC_EMB2 = DEC_EMB_LAYER(DECODER_INPUTS)
    # To predict the next word in the sequence, set the initial states to the states from the previous time step
    DECODER_OUTPUTS2, NEW_STRING2, STATE_C2 = DECODER_LSTM(DEC_EMB2,
                                                           initial_state=[DECODER_STATE_INPUT_H,
                                                                          DECODER_STATE_INPUT_C])
    
    # attention inference
    ATTN_OUT_INF, ATTN_STATES_INF = ATTN_LAYER([DECODER_HIDDEN_STATE_INPUT,
                                                DECODER_OUTPUTS2])
    DECODER_INF_CONCAT = Concatenate(axis=-1, name='concat')([DECODER_OUTPUTS2,
                                                              ATTN_OUT_INF])
    
    # A dense softmax layer to generate prob dist. over the target vocabulary
    DECODER_OUTPUTS2 = DECODER_DENSE(DECODER_INF_CONCAT)
    
    # Final decoder model
    DECODER_MODEL = Model(
        [DECODER_INPUTS] + [DECODER_HIDDEN_STATE_INPUT, DECODER_STATE_INPUT_H,
                            DECODER_STATE_INPUT_C],
        [DECODER_OUTPUTS2] + [STATE_H2, STATE_C2])

    
    return DECODER_INPUTS, DECODER_OUTPUTS,DEC_EMB_LAYER,DECODER_LSTM,DECODER_MODEL
Exemple #9
0
    def create_model_base(self, embeddings_matrix):
        """Creates the base hierarchical attention network with multiclass
           or binary tuning. If doing non-binary classification, set
           self.num_classes to number of classes.
        """
        embedding_layer = Embedding(
            len(self.word_index) + 1,
            ATTENTION_DIM,
            weights=[embeddings_matrix],
            input_length=MAX_SENTENCE_LENGTH,
            trainable=True,
            mask_zero=True,
        )
        sentence_input = Input(shape=(MAX_SENTENCE_LENGTH, ), dtype="int32")
        embedded_sequences = embedding_layer(sentence_input)
        l_lstm = Bidirectional(GRU(ATTENTION_DIM,
                                   return_sequences=True))(embedded_sequences)
        l_att = AttentionLayer(ATTENTION_DIM)(l_lstm)
        sentEncoder = Model(sentence_input, l_att)

        input_layer = Input(shape=(MAX_SENTENCE_COUNT, MAX_SENTENCE_LENGTH),
                            dtype="int32")
        layer_encoder = TimeDistributed(sentEncoder)(input_layer)
        l_lstm_sent = Bidirectional(GRU(ATTENTION_DIM,
                                        return_sequences=True))(layer_encoder)
        l_att_sent = AttentionLayer(ATTENTION_DIM)(l_lstm_sent)
        if self.num_classes > 2:
            # multi-class classifier
            preds = Dense(self.num_classes, activation="softmax")(l_att_sent)
            model = Model(input_layer)
            model = Model(input_layer, preds)
            model.compile(loss="categorical_crossentropy",
                          optimizer="adadelta",
                          metrics=metrics)
        else:
            # binary classifier
            preds = Dense(2, activation="softmax")(l_att_sent)
            model = Model(input_layer, preds)
            model.compile(loss="binary_crossentropy",
                          optimizer="adadelta",
                          metrics=metrics)
        return model
Exemple #10
0
    def __init__(self,
                 vocab_size,
                 embedding_size,
                 hidden_size,
                 rnncell='GRU',
                 num_layers=1,
                 max_unroll=40,
                 dropout=0.0,
                 word_drop=0.0,
                 batch_first=True,
                 sample=False,
                 temperature=1.0,
                 use_attention=True,
                 attn_size=128,
                 sos_id=2,
                 eos_id=3,
                 use_input_feed=True,
                 use_kb=False,
                 is_mutlitask=False,
                 kb_size=None,
                 celeb_vec_size=None,
                 state_size=None):
        super(DecoderRNN, self).__init__()

        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.sos_id = sos_id
        self.eos_id = eos_id
        self.num_layers = num_layers
        self.dropout = dropout
        self.temperature = temperature
        self.word_drop = word_drop
        self.max_unroll = max_unroll
        self.sample = sample
        self.is_mutlitask = is_mutlitask
        self.use_kb = use_kb
        self.state_size = state_size
        self.kb_size = kb_size
        # self.beam_size = beam_size
        self.attn_size = attn_size
        self.celeb_vec_size = celeb_vec_size
        self.use_input_feed = use_input_feed
        self.embedding = nn.Embedding(vocab_size, self.embedding_size)
        self.rnncell = nn.GRU(self._input_size(),
                              self.hidden_size,
                              num_layers=num_layers,
                              batch_first=batch_first,
                              dropout=dropout)
        self.use_attention = use_attention
        self.attention = AttentionLayer(self.attn_size)
        self.out = nn.Linear(hidden_size, vocab_size)
        self.softmax = nn.Softmax()
        self.sigmoid = nn.Sigmoid()
 def __init__(self, args, vocab_size, pretrained=None):
     super(BasicRNN, self).__init__()
     self.args = args
     self.vocab_size = vocab_size
     self.encoder = nn.Embedding(self.vocab_size, self.args.embedding_size)
     self.rnn = getattr(nn, self.args.rnn_type)(self.args.embedding_size, self.args.rnn_size, self.args.rnn_layers, 
                         bias=False)
     self.decoder = nn.Linear(self.args.rnn_size, 3)
     self.softmax = nn.Softmax()
     self.AttentionLayer = AttentionLayer(self.args, self.args.rnn_size)
     self.init_weights(pretrained=pretrained)
     print("Initialized {} model".format(self.args.rnn_type))
Exemple #12
0
def get_model(hidden_size, batch_size, en_timesteps, en_vsize, fr_timesteps, fr_vsize):
	# Define an input sequence and process it.
	if batch_size:
		encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name="encoder_inputs")
		decoder_inputs = Input(batch_shape=(batch_size, fr_timesteps, fr_vsize), name="decoder_inputs")
	else:
		encoder_inputs = Input(shape=(en_timesteps, en_vsize), name="encoder_inputs")
		decoder_inputs = Input(shape=(fr_timesteps, fr_vsize), name="decoder_inputs")

	# Encoder GRU
	encoder_gru = layers.Bidirectional(prunable_layers.PrunableGRU(hidden_size, return_sequences=True, return_state=True, name="encoder_gru"), name="bidirectional_encoder")
	encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(encoder_inputs)

	# Set up the decoder GRU, using `encoder_states` as initial state.
	decoder_gru = prunable_layers.PrunableGRU(hidden_size * 2, return_sequences=True, return_state=True, name="decoder_gru")
	decoder_out, decoder_state = decoder_gru(decoder_inputs, initial_state=layers.Concatenate(axis=-1)([encoder_fwd_state, encoder_back_state]))

	# Attention layer
	attn_layer = AttentionLayer(name='attention_layer')
	attn_out, attn_states = attn_layer([encoder_out, decoder_out])

	# Concat attention input and decoder GRU output
	decoder_concat_input = layers.Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out])

	# Dense layer
	dense = prunable_layers.PrunableDense(fr_vsize, activation='softmax', name='softmax_layer')
	dense_time = layers.TimeDistributed(dense, name='time_distributed_layer')
	decoder_pred = dense_time(decoder_concat_input)

	# Full model
	full_model = models.Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred)

	""" Inference model """
	batch_size = 1

	""" Encoder (Inference) model """
	encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inf_inputs')
	encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(encoder_inf_inputs)
	encoder_model = models.Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state])

	""" Decoder (Inference) model """
	decoder_inf_inputs = Input(batch_shape=(batch_size, 1, fr_vsize), name='decoder_word_inputs')
	encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps, 2*hidden_size), name='encoder_inf_states')
	decoder_init_state = Input(batch_shape=(batch_size, 2*hidden_size), name='decoder_init')

	decoder_inf_out, decoder_inf_state = decoder_gru(decoder_inf_inputs, initial_state=decoder_init_state)
	attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out])
	decoder_inf_concat = layers.Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out])
	decoder_inf_pred = layers.TimeDistributed(dense)(decoder_inf_concat)
	decoder_model = models.Model(inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state])

	return full_model, encoder_model, decoder_model
Exemple #13
0
  def __init__(self, feat_dim, vocab_size, embed_size, n_attention_stacks, hidden_dim_img):
    super(SAN, self).__init__()
    self.feat_dim = feat_dim
    self.vocab_size = vocab_size
    self.embed_size = embed_size

    self.img_enc = ImageEncoder(feat_dim)
    self.ques_enc = QuestionEncoder(vocab_size, embed_size, feat_dim)
    self.att = nn.ModuleList([
              AttentionLayer(hidden_dim_img, feat_dim, feat_dim) for _ in range(n_attention_stacks)
    ])

    self.pred = nn.Linear(feat_dim, vocab_size)
Exemple #14
0
def test(model, output, hidden_size, content_len):
    encoder_inputs = output[0]
    encoder_outputs = output[1]
    state_h = output[2]
    state_c = output[3]
    decoder_inputs = output[4]
    dec_emb_layer = output[5]
    decoder_lstm = output[6]
    decoder_softmax_layer = output[7]

    # 인코더 설계
    encoder_model = Model(inputs=encoder_inputs,
                          outputs=[encoder_outputs, state_h, state_c])

    # 이전 시점의 상태들을 저장하는 텐서
    decoder_state_input_h = Input(shape=(hidden_size, ))
    decoder_state_input_c = Input(shape=(hidden_size, ))

    dec_emb2 = dec_emb_layer(decoder_inputs)
    # 문장의 다음 단어를 예측하기 위해서 초기 상태(initial_state)를 이전 시점의 상태로 사용. 이는 뒤의 함수 decode_sequence()에 구현
    # 훈련 과정에서와 달리 LSTM의 리턴하는 은닉 상태와 셀 상태인 state_h와 state_c를 버리지 않음.
    decoder_outputs2, state_h2, state_c2 = decoder_lstm(
        dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])

    # 어텐션 함수
    decoder_hidden_state_input = Input(shape=(content_len, hidden_size))
    attn_layer = AttentionLayer(name='attention_layer')
    attn_out_inf, attn_states_inf = attn_layer(
        [decoder_hidden_state_input, decoder_outputs2])
    decoder_inf_concat = Concatenate(
        axis=-1, name='concat')([decoder_outputs2, attn_out_inf])

    # 디코더의 출력층
    decoder_outputs2 = decoder_softmax_layer(decoder_inf_concat)

    # 최종 디코더 모델
    decoder_model = Model([decoder_inputs] + [
        decoder_hidden_state_input, decoder_state_input_h,
        decoder_state_input_c
    ], [decoder_outputs2] + [state_h2, state_c2])

    return encoder_model, decoder_model
Exemple #15
0
 def defineModel(latent_dim, max_length_source, src_vocab, trg_vocab):
     # Encoder
     encoder_inputs = Input(shape=(max_length_source, ))
     enc_emb = Embedding(src_vocab, latent_dim,
                         trainable=True)(encoder_inputs)
     #LSTM 1
     encoder_lstm1 = LSTM(latent_dim,
                          return_sequences=True,
                          return_state=True)
     encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)
     #LSTM 2
     encoder_lstm2 = LSTM(latent_dim,
                          return_sequences=True,
                          return_state=True)
     encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)
     #LSTM 3
     encoder_lstm3 = LSTM(latent_dim,
                          return_state=True,
                          return_sequences=True)
     encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2)
     # Set up the decoder.
     decoder_inputs = Input(shape=(None, ))
     dec_emb_layer = Embedding(trg_vocab, latent_dim, trainable=True)
     dec_emb = dec_emb_layer(decoder_inputs)
     #LSTM using encoder_states as initial state
     decoder_lstm = LSTM(latent_dim,
                         return_sequences=True,
                         return_state=True)
     decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm(
         dec_emb, initial_state=[state_h, state_c])
     #Attention Layer
     attn_layer = AttentionLayer(name='attention_layer')
     attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])
     # Concat attention output and decoder LSTM output
     decoder_concat_input = Concatenate(
         axis=-1, name='concat_layer')([decoder_outputs, attn_out])
     #Dense layer
     decoder_dense = TimeDistributed(Dense(trg_vocab, activation='softmax'))
     decoder_outputs = decoder_dense(decoder_concat_input)
     # Define the model
     model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
     return model
Exemple #16
0
    def __init__(self, vocab_size, paragraph_window_size, summary_window_size):
        super(LSTM_Seq2Seq, self).__init__()
        self.paragraph_window_size = paragraph_window_size
        self.summary_window_size = summary_window_size
        self.vocab_size = vocab_size
        self.batch_size = 100
        self.embedding_size = 15
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.009)

        # self.paragraph_embedding = tf.Variable(tf.random.truncated_normal(shape=[self.paragraph_window_size,self.embedding_size],stddev=0.01,dtype=tf.float32))
        self.paragraph_embedding1 = Embedding(
            self.vocab_size,
            self.embedding_size,
            input_length=self.paragraph_window_size)
        self.summary_embedding1 = Embedding(
            self.vocab_size,
            self.embedding_size,
            input_length=self.summary_window_size)
        self.encoder = LSTM(80,
                            activation='relu',
                            return_state=True,
                            return_sequences=True)
        self.encoder1 = LSTM(80,
                             activation='relu',
                             return_state=True,
                             return_sequences=True)
        self.encoder2 = LSTM(80,
                             activation='relu',
                             return_state=True,
                             return_sequences=True)

        # self.inputs2 = Input(shape=(summary_window_size,))
        # self.summary_embedding = tf.Variable(tf.random.truncated_normal(shape=[self.summary_window_size,self.embedding_size],stddev=0.01,dtype=tf.float32))
        self.decoder = LSTM(80,
                            activation='relu',
                            return_state=True,
                            return_sequences=True)

        self.attn_layer = AttentionLayer(name='attention_layer')

        self.outputs = Dense(vocab_size, activation='softmax')
Exemple #17
0
def decoder(encoded_input, f, n_hidden, config):
    Layer1 = Simple(n_hidden, name='rand_inp')
    first_input_ = Layer1(f)
    lstm = LSTM(8,
                kernel_initializer=glorot_uniform(),
                return_sequences=True,
                stateful=True,
                name='lstm')
    positions = []
    log_softmax = []
    Layer2 = Decode(config.batch_size, name='decode_layer')
    attn_lyr = AttentionLayer(name='attention_layer')
    i = first_input_
    for step in range(config.nCells * config.nMuts):
        output = lstm(i)
        attn_out, attn_stat = attn_lyr([encoded_input, output])
        i, position, log_soft1 = Layer2([encoded_input, attn_out])
        positions.append(position)
        log_softmax.append(log_soft1)

    poss = Concatenate(axis=-1, trainable=True, name='poss')(positions)
    log_s = Concatenate(axis=-1, trainable=True, name='log_s')(log_softmax)
    return poss, log_s
    def getModel(self, xTrain, yTrain, xVal, yVal, xVocabSize, yVocabSize, maxTextLen):
        my_file = Path("./textSumamrizationModel.h5")
        if my_file.is_file(): 
            self.model = keras.models.load_model('./textSumamrizationModel.h5', custom_objects={'AttentionLayer': AttentionLayer})

        else:
            self.encoderInput = Input(shape=(maxTextLen,))
            embL = Embedding(xVocabSize, 200,trainable=True)(self.encoderInput)
            encoderLSTM = LSTM(300, return_state=True, return_sequences=True,dropout=0.4,recurrent_dropout=0.4)
            self.encoderOutput, self.stateH, self.stateC= encoderLSTM(embL)

            # Decoder
            self.decoderInput = Input(shape=(None,))
            self.decL = Embedding(yVocabSize, 200,trainable=True)
            decEmb = self.decL(self.decoderInput)

            self.decoderLstm = LSTM(300, return_sequences=True, return_state=True,dropout=0.4,recurrent_dropout=0.2)
            decoderOutputs,decoderFwdState, decoderBackState = self.decoderLstm(decEmb,initial_state=[self.stateH, self.stateC])

            #Attention layer
            self.attnL = AttentionLayer(name='attention_layer')
            attnO, attnS = self.attnL([self.encoderOutput, decoderOutputs])
            decoderCInput = Concatenate(axis=-1, name='concat_layer')([decoderOutputs, attnO])
            #dense layer
            self.decoderDense =  TimeDistributed(Dense(yVocabSize, activation='softmax'))
            decoderOutputs = self.decoderDense(decoderCInput)

            # Define the model 
            self.model = Model([self.encoderInput, self.decoderInput], decoderOutputs)
            self.model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')
            self.model.summary()
            es = EarlyStopping(monitor='val_loss', mode='min', verbose = 1, patience = 2)

            self.history =  self.model.fit([xTrain,yTrain[:,:-1]], yTrain.reshape(yTrain.shape[0],yTrain.shape[1], 1)[:,1:] ,epochs= 5,callbacks=[es],batch_size=256, validation_data=([xVal,yVal[:,:-1]], yVal.reshape(yVal.shape[0],yVal.shape[1], 1)[:,1:]))
            
            self.model.save('textSumamrizationModel.h5')
            self.drawModelFromTraining()
def prepare_model(data):
    max_len_text = 80
    max_len_summary = 10
    x_tr, x_val, y_tr, y_val = train_test_split(data['cleaned_text'],
                                                data['cleaned_summary'],
                                                test_size=0.1,
                                                random_state=0,
                                                shuffle=True)
    #prepare a tokenizer for reviews on training data
    x_tokenizer = Tokenizer()
    x_tokenizer.fit_on_texts(list(x_tr))
    #convert text sequences into integer sequences
    x_tr = x_tokenizer.texts_to_sequences(x_tr)
    x_val = x_tokenizer.texts_to_sequences(x_val)
    #padding zero upto maximum length
    x_tr = pad_sequences(x_tr, maxlen=max_len_text, padding='post')
    x_val = pad_sequences(x_val, maxlen=max_len_text, padding='post')
    x_voc_size = len(x_tokenizer.word_index) + 1

    #preparing a tokenizer for summary on training data
    y_tokenizer = Tokenizer()
    y_tokenizer.fit_on_texts(list(y_tr))
    #convert summary sequences into integer sequences
    y_tr = y_tokenizer.texts_to_sequences(y_tr)
    y_val = y_tokenizer.texts_to_sequences(y_val)
    #padding zero upto maximum length
    y_tr = pad_sequences(y_tr, maxlen=max_len_summary, padding='post')
    y_val = pad_sequences(y_val, maxlen=max_len_summary, padding='post')
    y_voc_size = len(y_tokenizer.word_index) + 1

    K.clear_session()
    latent_dim = 500
    # Encoder
    encoder_inputs = Input(shape=(max_len_text, ))
    enc_emb = Embedding(x_voc_size, latent_dim, trainable=True)(encoder_inputs)

    #LSTM 1
    encoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True)
    encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)

    #LSTM 2
    encoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True)
    encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)

    #LSTM 3
    encoder_lstm3 = LSTM(latent_dim, return_state=True, return_sequences=True)
    encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2)

    # Set up the decoder.
    decoder_inputs = Input(shape=(None, ))
    dec_emb_layer = Embedding(y_voc_size, latent_dim, trainable=True)
    dec_emb = dec_emb_layer(decoder_inputs)

    #LSTM using encoder_states as initial state
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm(
        dec_emb, initial_state=[state_h, state_c])

    #Attention Layer
    attn_layer = AttentionLayer(name='attention_layer')
    attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

    # Concat attention output and decoder LSTM output
    decoder_concat_input = Concatenate(
        axis=-1, name='concat_layer')([decoder_outputs, attn_out])

    #Dense layer
    decoder_dense = TimeDistributed(Dense(y_voc_size, activation='softmax'))
    decoder_outputs = decoder_dense(decoder_concat_input)

    # Define the model
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.summary()

    model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

    history = model.fit([x_tr, y_tr[:, :-1]],
                        y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)[:, 1:],
                        epochs=10,
                        callbacks=[es],
                        batch_size=512,
                        validation_data=([x_val, y_val[:, :-1]],
                                         y_val.reshape(y_val.shape[0],
                                                       y_val.shape[1], 1)[:,
                                                                          1:]))

    diagnostic_plot(history)

    reverse_target_word_index = y_tokenizer.index_word
    reverse_source_word_index = x_tokenizer.index_word
    target_word_index = y_tokenizer.word_index

    # encoder inference
    encoder_model = Model(inputs=encoder_inputs,
                          outputs=[encoder_outputs, state_h, state_c])

    # decoder inference
    # Below tensors will hold the states of the previous time step
    decoder_state_input_h = Input(shape=(latent_dim, ))
    decoder_state_input_c = Input(shape=(latent_dim, ))
    decoder_hidden_state_input = Input(shape=(max_len_text, latent_dim))

    # Get the embeddings of the decoder sequence
    dec_emb2 = dec_emb_layer(decoder_inputs)

    # To predict the next word in the sequence, set the initial states to the states from the previous time step
    decoder_outputs2, state_h2, state_c2 = decoder_lstm(
        dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])

    #attention inference
    attn_out_inf, attn_states_inf = attn_layer(
        [decoder_hidden_state_input, decoder_outputs2])
    decoder_inf_concat = Concatenate(
        axis=-1, name='concat')([decoder_outputs2, attn_out_inf])

    # A dense softmax layer to generate prob dist. over the target vocabulary
    decoder_outputs2 = decoder_dense(decoder_inf_concat)

    # Final decoder model
    decoder_model = Model([decoder_inputs] + [
        decoder_hidden_state_input, decoder_state_input_h,
        decoder_state_input_c
    ], [decoder_outputs2] + [state_h2, state_c2])
Exemple #20
0
def generator_model(category, content, title, embedding_dim, hidden_size):
    with open('word_dict/content_ix_to_word_' + category + '.pkl', 'rb') as f:
        content_ix_to_word = pickle.load(f)
    with open('word_dict/content_word_to_ix_' + category + '.pkl', 'rb') as f:
        content_word_to_ix = pickle.load(f)
    with open('word_dict/title_ix_to_word_' + category + '.pkl', 'rb') as f:
        title_ix_to_word = pickle.load(f)
    with open('word_dict/title_word_to_ix_' + category + '.pkl', 'rb') as f:
        title_word_to_ix = pickle.load(f)

    pad_num = 0
    oov_num = 1

    src_vocab = len(content_ix_to_word)
    tar_vocab = len(title_ix_to_word)

    index_title = change_word_to_index(title, title_word_to_ix, oov_num)
    index_content = change_word_to_index(content, content_word_to_ix, oov_num)

    content_len = max([len(x) - 1 for x in index_content])
    title_len = max([len(x) - 1 for x in index_title])

    input_idx = seq_padding(index_content, content_len, pad_num, True)
    target_idx = seq_padding(index_title, title_len, pad_num, False)

    temp = pd.DataFrame(input_idx).to_numpy()
    temp = np.array([s[:-1] for s in temp])
    input_data = temp

    temp = pd.DataFrame(target_idx).to_numpy()
    temp = np.array([s[:-1] for s in temp])
    target_data = temp

    xTrain, xTest, yTrain, yTest = train_test_split(input_data,
                                                    target_data,
                                                    test_size=0.2,
                                                    random_state=777,
                                                    shuffle=True)

    ###### 모델 설계
    # 인코더
    encoder_inputs = Input(shape=(content_len, ))

    # 인코더의 임베딩 층
    enc_emb = Embedding(src_vocab, embedding_dim)(encoder_inputs)

    # 인코더의 LSTM 1
    encoder_lstm1 = LSTM(hidden_size,
                         return_sequences=True,
                         return_state=True,
                         dropout=0.4,
                         recurrent_dropout=0.4)
    encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)

    # 인코더의 LSTM 2
    encoder_lstm2 = LSTM(hidden_size,
                         return_sequences=True,
                         return_state=True,
                         dropout=0.4,
                         recurrent_dropout=0.4)
    encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)

    # 인코더의 LSTM 3
    encoder_lstm3 = LSTM(hidden_size,
                         return_state=True,
                         return_sequences=True,
                         dropout=0.4,
                         recurrent_dropout=0.4)
    encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2)

    # 디코더
    decoder_inputs = Input(shape=(None, ))

    # 디코더의 임베딩 층
    dec_emb_layer = Embedding(src_vocab, embedding_dim)
    dec_emb = dec_emb_layer(decoder_inputs)

    # 디코더의 LSTM
    decoder_lstm = LSTM(hidden_size,
                        return_sequences=True,
                        return_state=True,
                        dropout=0.4,
                        recurrent_dropout=0.2)
    decoder_outputs, _, _ = decoder_lstm(dec_emb,
                                         initial_state=[state_h, state_c])

    # 디코더의 출력층
    decoder_softmax_layer = Dense(tar_vocab, activation='softmax')
    decoder_softmax_outputs = decoder_softmax_layer(decoder_outputs)

    # 모델 정의
    model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs)

    import urllib.request
    urllib.request.urlretrieve(
        "https://raw.githubusercontent.com/thushv89/attention_keras/master/layers/attention.py",
        filename="attention.py")
    from attention import AttentionLayer

    # 어텐션 층(어텐션 함수)
    attn_layer = AttentionLayer(name='attention_layer')
    attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

    # 어텐션의 결과와 디코더의 hidden state들을 연결
    decoder_concat_input = Concatenate(
        axis=-1, name='concat_layer')([decoder_outputs, attn_out])

    # 디코더의 출력층
    decoder_softmax_layer = Dense(tar_vocab, activation='softmax')
    decoder_softmax_outputs = decoder_softmax_layer(decoder_concat_input)

    # 모델 정의
    model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs)

    model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')

    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

    model.fit([xTrain, yTrain[:, :-1]],
              yTrain.reshape(yTrain.shape[0], yTrain.shape[1], 1)[:, 1:],
              epochs=50,
              callbacks=[es, TQDMCallback()],
              batch_size=128,
              validation_data=([xTest, yTest[:, :-1]],
                               yTest.reshape(yTest.shape[0], yTest.shape[1],
                                             1)[:, 1:]))

    model.save_weights('weights/' + category + '_checkpoint')
y_train = y_train.take([-1],axis = -1)
y_val = y_val.take([-1],axis = -1)

assert not np.any(np.isnan(X_train))
assert not np.any(np.isnan(X_val))
assert not np.any(np.isnan(X_covars_val))
assert not np.any(np.isnan(X_covars_train))
assert not np.any(np.isnan(y_val))
assert not np.any(np.isnan(y_train))


encoder_input = Input(shape = (X_train.shape[1],X_train.shape[2]),name = 'encoder_input')
decoder_input = Input(shape = (X_covars_train.shape[1],X_covars_train.shape[2]),name = 'decoder_input')

encoderLSTM = LSTM(units = latent_dim,return_state = True,return_sequences = True,name = 'enc_LSTM',dropout = dropout_rate)
attention1 = AttentionLayer()
decoderLSTM = LSTM(units = latent_dim,return_state = True,return_sequences = True,name = 'dec_LSTM',dropout = dropout_rate)
dense_output = TimeDistributed(Dense(1),name = 'time_distirbuted_dense_output')
gaussian_layer = GaussianLayer(1)

## building model

encoder_out, encoder_states = encoderLSTM(encoder_input)[0], encoderLSTM(encoder_input)[1:]

decoder_out, decoder_states = decoderLSTM(decoder_input,initial_state = encoder_states)[0],decoderLSTM(decoder_input,initial_state = encoder_states)[1:]

# explicitly define tensor shapes as TensorShape([Dimension(128), Dimension(12), Dimension(10)]) and TensorShape([Dimension(128), Dimension(8), Dimension(10)])

attn_out, attn_states = attention1([encoder_out,decoder_out])

decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out])
def test_model(
    model,
    attention = False
):

    if attention == False:
        wandb.init(config=config_best,  project="CS6910-Assignment-3", entity="rahulsundar")
        config = wandb.config
        wandb.run.name = (
            "Inference_" 
            + str(config.cell_type)
            + dataBase.source_lang
            + str(config.numEncoders)
            + "_"
            + dataBase.target_lang
            + "_"
            + str(config.numDecoders)
            + "_"
            + config.optimiser
            + "_"
            + str(config.epochs)
            + "_"
            + str(config.dropout) 
            + "_"
            + str(config.batch_size)
            + "_"
            + str(config.latentDim)
        )
        wandb.run.save()


        if config.cell_type == "LSTM":
            encoder_inputs = model.input[0]
            
            if config.numEncoders == 1:
                encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm").output 
            else:           
                encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm_"+ str(config.numEncoders-1)).output

            encoder_states = [state_h_enc, state_c_enc]
            encoder_model = Model(encoder_inputs, encoder_states)

            decoder_inputs = model.input[1]
            decoder_state_input_h = Input(shape=(config.latentDim,), name="input_3")
            decoder_state_input_c = Input(shape=(config.latentDim,), name="input_4")
            decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
            decoder_lstm = model.layers[-3]
            decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs )
            decoder_states = [state_h_dec, state_c_dec]
            decoder_dense = model.layers[-2]
            decoder_outputs = decoder_dense(decoder_outputs)
            
            decoder_dense = model.layers[-1]
            decoder_outputs = decoder_dense(decoder_outputs)
            decoder_model = Model(
                [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
            )
        elif config.cell_type == "GRU" or config.cell_type == "RNN":
            encoder_inputs = model.input[0]
            if config.cell_type == "GRU":
                if config.numEncoders == 1:
                    encoder_outputs, state = model.get_layer(name = "gru").output
                else:
                    encoder_outputs, state = model.get_layer(name = "gru_"+ str(config.numEncoders-1)).output
            else:
                if config.numEncoders == 1:
                    encoder_outputs, state = model.get_layer(name = "simple_rnn").output
                else:
                    encoder_outputs, state = model.get_layer(name = "simple_rnn_"+ str(config.numEncoders-1)).output

            encoder_states = [state]

            encoder_model = Model(encoder_inputs, encoder_states)

            decoder_inputs = model.input[1]

            decoder_state = Input(shape=(config.latentDim,), name="input_3")
            decoder_states_inputs = [decoder_state]

            decoder_gru = model.layers[-3]
            (decoder_outputs, state,) = decoder_gru(decoder_inputs, initial_state=decoder_states_inputs)
            decoder_states = [state]
            decoder_dense = model.layers[-2]
            decoder_outputs = decoder_dense(decoder_outputs)
            decoder_dense = model.layers[-1]
            decoder_outputs = decoder_dense(decoder_outputs)
            decoder_model = Model(
                [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
            )

        def decode_sequence(input_seq):
            # Encode the input as state vectors.
            states_value = encoder_model.predict(input_seq)

            # Generate empty target sequence of length 1.
            target_seq = np.zeros((1, 1, len(dataBase.target_char2int)))
            # Populate the first character of target sequence with the start character.
            target_seq[0, 0, dataBase.target_char2int["\n"]] = 1.0

            # Sampling loop for a batch of sequences
            # (to simplify, here we assume a batch of size 1).
            stop_condition = False
            decoded_sentence = ""
            while not stop_condition:
                if config.cell_type == "LSTM":
                    output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
                elif config.cell_type == "RNN" or config.cell_type == "GRU":
                    states_value = states_value[0].reshape((1, 256))
                    output_tokens, h = decoder_model.predict([target_seq] + [states_value])

                # Sample a token
                sampled_token_index = np.argmax(output_tokens[0, -1, :])
                sampled_char = dataBase.target_int2char[sampled_token_index]
                decoded_sentence += sampled_char

                # Exit condition: either hit max length
                # or find stop character.
                if sampled_char == "\n" or len(decoded_sentence) > 25:
                    stop_condition = True

                # Update the target sequence (of length 1).
                target_seq = np.zeros((1, 1, len(dataBase.target_char2int)))
                target_seq[0, 0, sampled_token_index] = 1.0

                # Update states
                if config.cell_type == "LSTM":
                    states_value = [h, c]
                elif config.cell_type == "RNN" or config.cell_type == "GRU":
                    states_value = [h]
            return decoded_sentence

        acc = 0
        sourcelang = []
        predictions = []
        original = []
        for i, row in dataBase.test.iterrows():
            input_seq = dataBase.test_encoder_input[i : i + 1]
            decoded_sentence = decode_sequence(input_seq)
            og_tokens = [dataBase.target_char2int[x] for x in row["tgt"]]
            predicted_tokens = [dataBase.target_char2int[x] for x in decoded_sentence.rstrip("\n")]
            # if decoded_sentence == row['tgt']:
            #   acc += 1
            sourcelang.append(row['src'])
            original.append(row['tgt'])
            predictions.append(decoded_sentence)

            if og_tokens == predicted_tokens:
                acc += 1

            if i % 100 == 0:
                print(f"Finished {i} examples")
                print(f"Source: {row['src']}")
                print(f"Original: {row['tgt']}")
                print(f"Predicted: {decoded_sentence}")
                print(f"Accuracy: {acc / (i+1)}")
                print(og_tokens)
                print(predicted_tokens)
                

        print(f'Test Accuracy: {acc}')
        wandb.log({'test_accuracy': acc / len(dataBase.test)})
        wandb.finish()
        return acc / len(dataBase.test), sourcelang, original, predictions

    elif attention == True:
        wandb.init(config=config_best_attention2,  project="CS6910-Assignment-3", entity="rahulsundar")
        config = wandb.config
        wandb.run.name = (
            "Inference_WithAttn_" 
            + str(config.cell_type)
            + dataBase.source_lang
            + str(config.numEncoders)
            + "_"
            + dataBase.target_lang
            + "_"
            + str(config.numDecoders)
            + "_"
            + config.optimiser
            + "_"
            + str(config.epochs)
            + "_"
            + str(config.dropout) 
            + "_"
            + str(config.batch_size)
            + "_"
            + str(config.latentDim)
        )
        wandb.run.save()


        if config.cell_type == "LSTM":
            encoder_inputs = model.input[0]
            if config.numEncoders == 1:
                encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm").output 
            else:           
                encoder_outputs, state_h_enc, state_c_enc = model.get_layer(name = "lstm_"+ str(config.numEncoders-1)).output
            encoder_first_outputs, _, _ = model.get_layer(name = "lstm").output
            encoder_states = [state_h_enc, state_c_enc]
            encoder_model = Model(encoder_inputs, encoder_states)

            decoder_inputs = model.input[1]
            decoder_state_input_h = Input(shape=(config.latentDim,), name="input_3")
            decoder_state_input_c = Input(shape=(config.latentDim,), name="input_4")
            decoder_hidden_state = Input(shape=(None,config["latentDim"]), name = "input_5")
            decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
            #decoder_lstm = model.layers[-3]
            decoder_lstm = model.get_layer(name = "lstm_"+ str(config.numEncoders + config.numDecoders -1))
            decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs )
            decoder_states = [state_h_dec, state_c_dec]

            attention_layer = model.get_layer(name = "attention_layer")#AttentionLayer(name='attention_layer')
            attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_outputs])


            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])
            
            decoder_dense = model.layers[-2]
            decoder_time = TimeDistributed(decoder_dense)
            hidden_outputs = decoder_time(decoder_concat_input)
            decoder_dense = model.layers[-1]
            decoder_outputs = decoder_dense(hidden_outputs)

            decoder_model = Model(inputs = [decoder_inputs] + [decoder_hidden_state , decoder_states_inputs], outputs = [decoder_outputs] + decoder_states)
            #decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states )
            
        elif config.cell_type == "GRU" or config.cell_type == "RNN":
            encoder_inputs = model.input[0]
            if config.cell_type == "GRU":
                if config.numEncoders == 1:
                    encoder_outputs, state = model.get_layer(name = "gru").output
                else:
                    encoder_outputs, state = model.get_layer(name = "gru_"+ str(config.numEncoders-1)).output
                encoder_first_outputs, _ = model.get_layer(name = "gru").output
            else:
                if config.numEncoders == 1:
                    encoder_outputs, state = model.get_layer(name = "simple_rnn").output
                else:
                    encoder_outputs, state = model.get_layer(name = "simple_rnn_"+ str(config.numEncoders-1)).output
                encoder_first_outputs, _ = model.get_layer(name = "simple_rnn").output
            encoder_states = [state]

            encoder_model = Model(encoder_inputs, outputs = [encoder_first_outputs, encoder_outputs] + encoder_states)

            decoder_inputs = model.input[1]

            decoder_state = Input(shape=(config.latentDim,), name="input_3")
            decoder_hidden_state = Input(shape=(None,config["latentDim"]), name = "input_4")
            decoder_states_inputs = [decoder_state]

            if config.cell_type == "GRU":
                decoder_gru = model.get_layer(name = "gru_"+ str(config.numEncoders + config.numDecoders -1))#model.layers[-3]
                (decoder_outputs, state) = decoder_gru(decoder_inputs, initial_state=decoder_states_inputs)
                decoder_states = [state]

            else:
                decoder_gru = model.get_layer(name = "simple_rnn_"+ str(config.numEncoders + config.numDecoders -1))#model.layers[-3]
                (decoder_outputs, state) = decoder_gru(decoder_inputs, initial_state=decoder_states_inputs)
                decoder_states = [state]

                    
            attention_layer = AttentionLayer(name='attention_layer')
            #decoder_outputs_att = decoder_ouputs
            attention_out, attention_states = attention_layer([decoder_hidden_state, decoder_outputs])

            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])

            decoder_dense = model.layers[-2]
            decoder_time = TimeDistributed(decoder_dense)
            hidden_outputs = decoder_time(decoder_concat_input)
            decoder_dense = model.layers[-1]
            decoder_outputs = decoder_dense(hidden_outputs)

            decoder_model = Model(inputs = [decoder_inputs] + [decoder_hidden_state , decoder_states_inputs], outputs = [decoder_outputs] + decoder_states)
            
        def decode_sequence(input_seq):
            # Encode the input as state vectors.
            encoder_first_outputs, _, states_value = encoder_model.predict(input_seq)

            # Generate empty target sequence of length 1.
            target_seq = np.zeros((1, 1, len(dataBase.target_char2int)))
            # Populate the first character of target sequence with the start character.
            target_seq[0, 0, dataBase.target_char2int["\n"]] = 1.0

            # Sampling loop for a batch of sequences
            # (to simplify, here we assume a batch of size 1).
            stop_condition = False
            decoded_sentence = ""
            attention_weights = []
            while not stop_condition:
                if config.cell_type == "LSTM":
                    output_tokens, h, c = decoder_model.predict([target_seq, encoder_first_outputs] + states_value)
                elif config.cell_type == "RNN" or config.cell_type == "GRU":
                    states_value = states_value[0].reshape((1, config.latentDim))
                    output_tokens, h = decoder_model.predict([target_seq] + [encoder_first_outputs] + [states_value])
                #dec_ind = np.argmax(output_tokens, axis=-1)[0, 0]
                #attention_weights.append((dec_ind, attn_states))
                # Sample a token
                sampled_token_index = np.argmax(output_tokens[0, -1, :])
                sampled_char = dataBase.target_int2char[sampled_token_index]
                decoded_sentence += sampled_char

                # Exit condition: either hit max length
                # or find stop character.
                if sampled_char == "\n" or len(decoded_sentence) > 25:
                    stop_condition = True

                # Update the target sequence (of length 1).
                target_seq = np.zeros((1, 1, len(dataBase.target_char2int)))
                target_seq[0, 0, sampled_token_index] = 1.0

                # Update states
                if config.cell_type == "LSTM":
                    states_value = [h, c]
                elif config.cell_type == "RNN" or config.cell_type == "GRU":
                    states_value = [h]
            return decoded_sentence #, attention_weights

        acc = 0
        sourcelang = []
        predictions = []
        original = []
        #attention_weights_test = []
        for i, row in dataBase.test.iterrows():
            input_seq = dataBase.test_encoder_input[i : i + 1]
            decoded_sentence, attention_weights = decode_sequence(input_seq)
            og_tokens = [dataBase.target_char2int[x] for x in row["tgt"]]
            predicted_tokens = [dataBase.target_char2int[x] for x in decoded_sentence.rstrip("\n")]
            # if decoded_sentence == row['tgt']:
            #   acc += 1
            sourcelang.append(row['src'])
            original.append(row['tgt'])
            predictions.append(decoded_sentence)
            #attention_weights_test.append(attention_weights)
            if og_tokens == predicted_tokens:
                acc += 1

            if i % 100 == 0:
                print(f"Finished {i} examples")
                print(f"Source: {row['src']}")
                print(f"Original: {row['tgt']}")
                print(f"Predicted: {decoded_sentence}")
                print(f"Accuracy: {acc / (i+1)}")
                print(og_tokens)
                print(predicted_tokens)
                

        print(f'Test Accuracy: {acc}')
        wandb.log({'test_accuracy': acc / len(dataBase.test)})
        wandb.finish()
        return acc / len(dataBase.test) , sourcelang, original, predictions #, attention_weights_test
def model_create(category, embedding_dim, hidden_size):

    word_len_dict = {
        '날씨': [696, 4100],
        '사건_사고': [2999, 11623],
        '뇌물수수': [2668, 16511]
    }
    tar_vocab, src_vocab = word_len_dict[category]

    content_len_dict = {
        '날씨': [976, 12],
        '사건_사고': [1134, 13],
        '뇌물수수': [1417, 17]
    }
    content_len, title_len = content_len_dict[category]

    ###### 모델 설계
    # 인코더
    encoder_inputs = Input(shape=(content_len, ))

    # 인코더의 임베딩 층
    enc_emb = Embedding(src_vocab, embedding_dim)(encoder_inputs)

    # 인코더의 LSTM 1
    encoder_lstm1 = LSTM(hidden_size,
                         return_sequences=True,
                         return_state=True,
                         dropout=0.4,
                         recurrent_dropout=0.4)
    encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)

    # 인코더의 LSTM 2
    encoder_lstm2 = LSTM(hidden_size,
                         return_sequences=True,
                         return_state=True,
                         dropout=0.4,
                         recurrent_dropout=0.4)
    encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)

    # 인코더의 LSTM 3
    encoder_lstm3 = LSTM(hidden_size,
                         return_state=True,
                         return_sequences=True,
                         dropout=0.4,
                         recurrent_dropout=0.4)

    if category == '사건_사고':
        encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2)
    else:
        encoder_outpus3, state_h3, state_c3 = encoder_lstm3(endcoder_output2)

        # 인코더의 LSTM 4
        encoder_lstm4 = LSTM(hidden_size,
                             return_state=True,
                             return_sequences=True,
                             dropout=0.4,
                             recurrent_dropout=0.4)
        encoder_outputs, state_h, state_c = encoder_lstm4(encoder_output3)

    # 디코더
    decoder_inputs = Input(shape=(None, ))

    # 디코더의 임베딩 층
    dec_emb_layer = Embedding(src_vocab, embedding_dim)
    dec_emb = dec_emb_layer(decoder_inputs)

    # 디코더의 LSTM
    decoder_lstm = LSTM(hidden_size,
                        return_sequences=True,
                        return_state=True,
                        dropout=0.4,
                        recurrent_dropout=0.2)
    decoder_outputs, _, _ = decoder_lstm(dec_emb,
                                         initial_state=[state_h, state_c])

    # 디코더의 출력층
    decoder_softmax_layer = Dense(tar_vocab, activation='softmax')
    decoder_softmax_outputs = decoder_softmax_layer(decoder_outputs)

    # 모델 정의
    model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs)

    import urllib.request
    urllib.request.urlretrieve(
        "https://raw.githubusercontent.com/thushv89/attention_keras/master/layers/attention.py",
        filename="attention.py")
    from attention import AttentionLayer

    # 어텐션 층(어텐션 함수)
    attn_layer = AttentionLayer(name='attention_layer')
    attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

    # 어텐션의 결과와 디코더의 hidden state들을 연결
    decoder_concat_input = Concatenate(
        axis=-1, name='concat_layer')([decoder_outputs, attn_out])

    # 디코더의 출력층
    decoder_softmax_layer = Dense(tar_vocab, activation='softmax')
    decoder_softmax_outputs = decoder_softmax_layer(decoder_concat_input)

    # 모델 정의
    model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs)

    if category == '날씨':
        model.compile(optimizer='rmsprop',
                      loss='sparse_categorical_crossentropy')
    elif category == '사건_사고':
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    else:
        from tensorflow.keras.optimizers import *

        adam = optimizers.Adam(lr=0.01,
                               beta_1=0.8,
                               beta_2=0.999,
                               epsilon=None,
                               decay=1e-5,
                               amsgrad=False)
        model.compile(optimizer=adam, loss='sparse_categorical_crossentropy')

    outputs = [
        encoder_inputs, encoder_outputs, state_h, state_c, decoder_inputs,
        dec_emb_layer, decoder_lstm, decoder_softmax_layer
    ]

    return model, outputs
Exemple #24
0
def define_nmt(hidden_size, batch_size, eng_timesteps, eng_vocab_size,
               ger_timesteps, ger_vocab_size):
    """ Defining a NMT model """

    # Define an input sequence and process it.
    embedding_size = 100
    if batch_size:
        encoder_inputs = Input(batch_shape=(batch_size, eng_timesteps),
                               name='encoder_inputs')
        decoder_inputs = Input(batch_shape=(batch_size, ger_timesteps - 1),
                               name='decoder_inputs')
    # else:
    #     encoder_inputs = Input(shape=(eng_timesteps), name='encoder_inputs')
    #     decoder_inputs = Input(shape=(fr_timesteps - 1, fr_vsize), name='decoder_inputs')

    encoder_embedding = Embedding(input_dim=eng_vocab_size,
                                  output_dim=embedding_size)
    embedded_encoder_inputs = encoder_embedding(encoder_inputs)
    # Encoder GRU
    encoder_gru = Bidirectional(GRU(hidden_size,
                                    return_sequences=True,
                                    return_state=True,
                                    name='encoder_gru'),
                                name='bidirectional_encoder')
    encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(
        embedded_encoder_inputs)

    decoder_embedding = Embedding(input_dim=ger_vocab_size,
                                  output_dim=embedding_size)
    embedded_decoder_inputs = decoder_embedding(decoder_inputs)
    # Set up the decoder GRU, using `encoder_states` as initial state.
    decoder_gru = Bidirectional(GRU(hidden_size,
                                    return_sequences=True,
                                    return_state=True,
                                    name='decoder_gru'),
                                name='bidirectional_decoder')
    decoder_out, decoder_fwd_state, decoder_back_state = decoder_gru(
        embedded_decoder_inputs,
        initial_state=[encoder_fwd_state, encoder_back_state])

    # Attention layer
    attn_layer = AttentionLayer(name='attention_layer')
    attn_out, attn_states = attn_layer([encoder_out, decoder_out])

    # Concat attention input and decoder GRU output
    decoder_concat_input = Concatenate(
        axis=-1, name='concat_layer')([decoder_out, attn_out])

    # Dense layer
    dense = Dense(ger_vocab_size, activation='softmax', name='softmax_layer')
    dense_time = TimeDistributed(dense, name='time_distributed_layer')
    decoder_pred = dense_time(decoder_concat_input)

    # Full model
    full_model = Model(inputs=[encoder_inputs, decoder_inputs],
                       outputs=decoder_pred)
    full_model.compile(optimizer='adam', loss='categorical_crossentropy')

    full_model.summary()
    """ Inference model """
    batch_size = 1
    """ Encoder (Inference) model """
    encoder_inf_inputs = Input(batch_shape=(batch_size, eng_timesteps),
                               name='encoder_inf_inputs')
    encoder_inf_embedded_inputs = encoder_embedding(encoder_inf_inputs)
    encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(
        encoder_inf_embedded_inputs)
    encoder_model = Model(inputs=encoder_inf_inputs,
                          outputs=[
                              encoder_inf_out, encoder_inf_fwd_state,
                              encoder_inf_back_state
                          ])
    """ Decoder (Inference) model """
    decoder_inf_inputs = Input(batch_shape=(batch_size, 1),
                               name='decoder_word_inputs')
    encoder_inf_states = Input(batch_shape=(batch_size, eng_timesteps,
                                            2 * hidden_size),
                               name='encoder_inf_states')
    decoder_init_fwd_state = Input(batch_shape=(batch_size, hidden_size),
                                   name='decoder_fwd_init')
    decoder_init_back_state = Input(batch_shape=(batch_size, hidden_size),
                                    name='decoder_back_init')

    decoder_inf_embedded_inputs = decoder_embedding(decoder_inf_inputs)
    decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder_gru(
        decoder_inf_embedded_inputs,
        initial_state=[decoder_init_fwd_state, decoder_init_back_state])
    attn_inf_out, attn_inf_states = attn_layer(
        [encoder_inf_states, decoder_inf_out])
    decoder_inf_concat = Concatenate(
        axis=-1, name='concat')([decoder_inf_out, attn_inf_out])
    decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat)
    decoder_model = Model(inputs=[
        encoder_inf_states, decoder_init_fwd_state, decoder_init_back_state,
        decoder_inf_inputs
    ],
                          outputs=[
                              decoder_inf_pred, attn_inf_states,
                              decoder_inf_fwd_state, decoder_inf_back_state
                          ])
    # encoder_model = ""
    # decoder_model = ""
    return full_model, encoder_model, decoder_model
    def build_attention_model(self):       
        if self.cell_type == "RNN":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = SimpleRNN(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state = encoder(encoder_inputs) 
                
                if i == 1:
                    encoder_first_outputs= encoder_outputs                  
            encoder_states = [state]
            

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = SimpleRNN(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _ = decoder(decoder_inputs, initial_state=encoder_states)
                
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_outputs

            attention_layer = AttentionLayer(name='attention_layer')
            attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs])


            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_outputs = hidden(decoder_concat_input)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model
        
        elif self.cell_type == "LSTM":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state_h, state_c = encoder(encoder_outputs)
                if i == 1:
                    encoder_first_outputs= encoder_outputs                  
         
            encoder_states = [state_h, state_c]

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = LSTM(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _, _ = decoder(
                    decoder_outputs, initial_state=encoder_states
                )
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_outputs

            attention_layer = AttentionLayer(name='attention_layer')
            attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs])

            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_outputs = hidden(decoder_concat_input)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model
        
        elif self.cell_type == "GRU":
            # encoder
            encoder_inputs = Input(shape=(None, len(self.srcChar2Int)))
            encoder_outputs = encoder_inputs
            for i in range(1, self.numEncoders + 1):
                encoder = GRU(
                    self.latentDim,
                    return_state=True,
                    return_sequences=True,
                    dropout=self.dropout,
                )
                encoder_outputs, state = encoder(encoder_inputs)

                if i == 1:
                    encoder_first_outputs= encoder_outputs                  
         
            encoder_states = [state]

            # decoder
            decoder_inputs = Input(shape=(None, len(self.tgtChar2Int)))
            decoder_outputs = decoder_inputs
            for i in range(1, self.numDecoders + 1):
                decoder = GRU(
                    self.latentDim,
                    return_sequences=True,
                    return_state=True,
                    dropout=self.dropout,
                )
                decoder_outputs, _ = decoder(decoder_inputs, initial_state=encoder_states)
                if i == self.numDecoders:
                    decoder_first_outputs = decoder_outputs



            attention_layer = AttentionLayer(name='attention_layer')
            attention_out, attention_states = attention_layer([encoder_first_outputs, decoder_first_outputs])

            decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_out])

            # dense
            hidden = Dense(self.hidden, activation="relu")
            hidden_time = TimeDistributed(hidden, name='time_distributed_layer')
            hidden_outputs = hidden(decoder_concat_input)
            decoder_dense = Dense(len(self.tgtChar2Int), activation="softmax")
            decoder_outputs = decoder_dense(hidden_outputs)
            model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
            
            return model
Exemple #26
0
    def decode_layer(self):
        # 인코더
        encoder_inputs = Input(shape=(self.text_max_len, ))

        # 인코더의 임베딩 층
        enc_emb = Embedding(self.src_vocab, self.embedding_dim)(encoder_inputs)

        # 인코더의 LSTM 1
        encoder_lstm1 = LSTM(self.hidden_size,
                             return_sequences=True,
                             return_state=True,
                             dropout=0.4,
                             recurrent_dropout=0.4)
        encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)

        # 인코더의 LSTM 2
        encoder_lstm2 = LSTM(self.hidden_size,
                             return_sequences=True,
                             return_state=True,
                             dropout=0.4,
                             recurrent_dropout=0.4)
        encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)

        # 인코더의 LSTM 3
        encoder_lstm3 = LSTM(self.hidden_size,
                             return_state=True,
                             return_sequences=True,
                             dropout=0.4,
                             recurrent_dropout=0.4)
        encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2)

        # 디코더
        decoder_inputs = Input(shape=(None, ))

        # 디코더의 임베딩 층
        dec_emb_layer = Embedding(self.tar_vocab, self.embedding_dim)
        dec_emb = dec_emb_layer(decoder_inputs)

        # 디코더의 LSTM
        decoder_lstm = LSTM(self.hidden_size,
                            return_sequences=True,
                            return_state=True,
                            dropout=0.4,
                            recurrent_dropout=0.2)
        decoder_outputs, _, _ = decoder_lstm(dec_emb,
                                             initial_state=[state_h, state_c])

        # 어텐션 층(어텐션 함수)
        attn_layer = AttentionLayer(name='attention_layer')
        attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

        # 어텐션의 결과와 디코더의 hidden state들을 연결
        decoder_concat_input = Concatenate(
            axis=-1, name='concat_layer')([decoder_outputs, attn_out])

        # 디코더의 출력층
        decoder_softmax_layer = Dense(self.tar_vocab, activation='softmax')
        decoder_softmax_outputs = decoder_softmax_layer(decoder_concat_input)

        # 모델 정의
        model = Model([encoder_inputs, decoder_inputs],
                      decoder_softmax_outputs)
        model.summary()
        model.load_weights('APP/Data/attention_best_model_v2.h5')
        model.compile(optimizer='rmsprop',
                      loss='sparse_categorical_crossentropy')

        # seq2seq + attention으로 요약
        self.tar_word_to_index = self.tar_tokenizer.word_index  # 요약 단어 집합에서 단어 -> 정수를 얻음
        self.tar_index_to_word = self.tar_tokenizer.index_word  # 요약 단어 집합에서 정수 -> 단어를 얻음

        # 인코더 설계
        self.encoder_model = Model(inputs=encoder_inputs,
                                   outputs=[encoder_outputs, state_h, state_c])

        # 이전 시점의 상태들을 저장하는 텐서
        decoder_state_input_h = Input(shape=(self.hidden_size, ))
        decoder_state_input_c = Input(shape=(self.hidden_size, ))

        dec_emb2 = dec_emb_layer(decoder_inputs)
        # 문장의 다음 단어를 예측하기 위해서 초기 상태(initial_state)를 이전 시점의 상태로 사용
        decoder_outputs2, state_h2, state_c2 = decoder_lstm(
            dec_emb2,
            initial_state=[decoder_state_input_h, decoder_state_input_c])

        # 어텐션 함수
        decoder_hidden_state_input = Input(shape=(self.text_max_len,
                                                  self.hidden_size))
        attn_out_inf, attn_states_inf = attn_layer(
            [decoder_hidden_state_input, decoder_outputs2])
        decoder_inf_concat = Concatenate(
            axis=-1, name='concat')([decoder_outputs2, attn_out_inf])

        # 디코더의 출력층
        decoder_outputs2 = decoder_softmax_layer(decoder_inf_concat)

        # 최종 디코더 모델
        self.decoder_model = Model([decoder_inputs] + [
            decoder_hidden_state_input, decoder_state_input_h,
            decoder_state_input_c
        ], [decoder_outputs2] + [state_h2, state_c2])
Exemple #27
0
                     recurrent_dropout=0.4)
encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2)

decoder_inputs = Input(shape=(None, ))
dec_emb_layer = Embedding(y_voc, embedding_dim, trainable=True)
dec_emb = dec_emb_layer(decoder_inputs)

decoder_lstm = LSTM(latent_dim,
                    return_sequences=True,
                    return_state=True,
                    dropout=0.4,
                    recurrent_dropout=0.2)
decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm(
    dec_emb, initial_state=[state_h, state_c])

attn_layer = AttentionLayer(name="attention_layer")
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

decoder_concat_input = Concatenate(
    axis=-1, name='concat_layer')([decoder_outputs, attn_out])

decoder_dense = TimeDistributed(Dense(y_voc, activation='softmax'))
decoder_outputs = decoder_dense(decoder_concat_input)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()

model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)

history = model.fit([x_tr, y_tr[:, :-1]],
Exemple #28
0
#LSTM 3 
encoder_lstm3=LSTM(latent_dim, return_state=True, return_sequences=True) 
encoder_outputs, state_h, state_c= encoder_lstm3(encoder_output2) 
'''

# Set up the decoder.
decoder_inputs = Input(shape=(None, ))
dec_emb_layer = Embedding(y_voc_size, latent_dim, trainable=True)
dec_emb = dec_emb_layer(decoder_inputs)
#LSTM using encoder_states as initial state
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm(
    dec_emb, initial_state=[state_h, state_c])

#Attention Layer
attn_out, attn_states = AttentionLayer(name='attention_layer')(
    [encoder_output, decoder_outputs])

# Concat attention output and decoder LSTM output
decoder_concat_input = Concatenate(
    axis=-1, name='concat_layer')([decoder_outputs, attn_out])

#Dense layer
decoder_dense = TimeDistributed(Dense(y_voc_size, activation='softmax'))
decoder_outputs = decoder_dense(decoder_concat_input)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()

model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')
decoder_inputs = Input(shape=(None, ))

#embedding layer
dec_emb_layer = Embedding(y_voc, embedding_dim, trainable=True)
dec_emb = dec_emb_layer(decoder_inputs)

decoder_lstm = LSTM(latent_dim,
                    return_sequences=True,
                    return_state=True,
                    dropout=0.4,
                    recurrent_dropout=0.2)
decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm(
    dec_emb, initial_state=[state_h, state_c])

# Attention layer
attn_layer = AttentionLayer(name='attention_layer')
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

# Concat attention input and decoder LSTM output
decoder_concat_input = Concatenate(
    axis=-1, name='concat_layer')([decoder_outputs, attn_out])

#dense layer
decoder_dense = TimeDistributed(Dense(y_voc, activation='softmax'))
decoder_outputs = decoder_dense(decoder_concat_input)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

model.summary()
Exemple #30
0
def create_model(latent_dim, bidirectional):
    attention = False
    ## Create encoder
    encoder_inputs = Input(shape=(None, 14))

    if bidirectional:
        encoder = Bidirectional(LSTM(latent_dim, return_state=True))
        encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder(
            encoder_inputs)
        state_h = Concatenate()([forward_h, backward_h])
        state_c = Concatenate()([forward_c, backward_c])
        print('state_h.size', k.shape(state_h))
        print('state_c.size', k.shape(state_c))
    else:
        encoder = LSTM(latent_dim, return_sequences=True, return_state=True)  #
        #encoder = LSTM(latent_dim, return_state = True)
        encoder_outputs, state_h, state_c = encoder(encoder_inputs)

    encoder_states = [state_h, state_c]
    ## Keep encoder as separate model
    enc_model = Model(encoder_inputs, encoder_states, name='encoder_model')

    ## Create decoder
    decoder_input = Input(shape=(None, 38))
    if bidirectional:
        in_h_state = Input(shape=(2 * latent_dim, ))
        in_e_state = Input(shape=(2 * latent_dim, ))
        decoder_lstm = LSTM(latent_dim * 2,
                            return_sequences=True,
                            return_state=True)
    else:
        in_h_state = Input(shape=(latent_dim, ))
        in_e_state = Input(shape=(latent_dim, ))
        decoder_lstm = LSTM(latent_dim,
                            return_sequences=True,
                            return_state=True)
    decoder_outputs, dec_h_state, dec_c_state = decoder_lstm(
        decoder_input, initial_state=[in_h_state, in_e_state])

    if attention:
        #---------------------------------------------------------------------------------------------------------------------
        # Attention layer
        attn_layer = AttentionLayer(name='attention_layer')
        attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

        # Concat attention input and decoder GRU output
        decoder_concat_input = Concatenate(
            axis=-1, name='concat_layer')([decoder_outputs, attn_out])
        #---------------------------------------------------------------------------------------------------------------------

    decoder_dense = Dense(38, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)
    ## Keep decoder as separate model
    dec_model = Model([decoder_input, in_h_state, in_e_state],
                      [decoder_outputs, dec_h_state, dec_c_state],
                      name='decoder_model')

    ## Combined encoder and decoder model
    ## Inputs are encoder input and decoder input
    ## Output is the 0th output of dec_model (not the states) when applied on decoder input with encoded states
    model = Model([encoder_inputs, decoder_input],
                  dec_model([decoder_input] + enc_model(encoder_inputs))[0],
                  name='combined_model')
    model.compile(optimizer='Adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    ## Return all 3 models
    model.summary()
    #input('pause')
    return enc_model, dec_model, model