Exemple #1
0
print(embeddings.shape)

# 문장을 학습 데이터와 테스트 데이터로 나눈다.
train_size = 0.7
Xtrain, Xtest = train_test_split(sent_wids, train_size=train_size)
print("number of sentences: ", len(sent_wids))
print(Xtrain.shape, Xtest.shape)

# 학습 데이터와 테스트 데이터 제네레이터 정의
train_gen = sentence_generator(Xtrain, embeddings, BATCH_SIZE)
test_gen = sentence_generator(Xtest, embeddings, BATCH_SIZE)

# 오토인코더 네트워크 정의
inputs = Input(shape=(SEQUENCE_LEN, EMBED_SIZE), name="input")
encoded = Bidirectional(LSTM(LATENT_SIZE),
                        merge_mode="sum",
                        name="encoder_lstm")(inputs)
decoded = RepeatVector(SEQUENCE_LEN, name="repeater")(encoded)
decoded = Bidirectional(LSTM(EMBED_SIZE, return_sequences=True),
                        merge_mode="sum",
                        name="decoder_lstm")(decoded)

autoencoder = Model(inputs, decoded)

autoencoder.compile(optimizer="sgd", loss="mse")

# 학습
num_train_steps = len(Xtrain) // BATCH_SIZE
num_test_steps = len(Xtest) // BATCH_SIZE
checkpoint = ModelCheckpoint(filepath=os.path.join(
    DATA_DIR, "sent-thoughts-autoencoder.h5"),
Exemple #2
0
    def Init(self, weightsfile=None, verbose=False):
        """
        Method initializes the model based on the specified
        parameters.
        Input:
        weightsfile  -- File with weights (default is None).
        sanitycheck  -- Validation check.
        verbose      -- Flag for verbose mode, printing architecture (default = False).
        Return:
        Initialized model based on the specified parameters.
        """
        # Load the variables
        num_chars, maxlen = self.Utils.NumChars(), self.Utils.MaxLen()
        l1, l2 = self.Layers[0], self.Layers[1]
        dropout = self.Dropout

        # Build the model using the specified unit
        # Example units are for instance CuDNNLSTM or CuDNNGRU
        # The unit has been introduced as variable to facilitate flexible modifications.
        comment_seq = Input(shape=[maxlen, num_chars], name="Input")

        # Define image
        minimodels = []
        if self.split == 1:
            # Apply scheme 1: Multiple embedding and multiple encoding
            for idx in range(self.num_models):
                if self.bilstm[0]:
                    output_i = Bidirectional(self.Unit(l1,
                                                       return_sequences=True),
                                             name="Embedding")(comment_seq)
                else:
                    output_i = self.Unit(l1,
                                         return_sequences=True,
                                         name="Embedding")(comment_seq)
                if self.bilstm[1]:
                    output_i = Bidirectional(self.Unit(l2),
                                             name="Latent_%s" %
                                             (idx))(output_i)
                else:
                    output_i = self.Unit(l2,
                                         name="Latent_%s" % (idx))(output_i)
                output_i = LayerNormalization(name="LayerNormm_%s" %
                                              (idx))(output_i)
                minimodels.append(output_i)

        else:
            # Apply scheme 0: One embedding and multiple encoding
            if self.bilstm[0]:
                output = Bidirectional(self.Unit(l1, return_sequences=True),
                                       name="Embedding")(comment_seq)
            else:
                output = self.Unit(l1, return_sequences=True,
                                   name="Embedding")(comment_seq)

            # Create multiple encoding models
            minimodels = []
            for idx in range(self.num_models):
                if self.bilstm[1]:
                    output_i = Bidirectional(self.Unit(l2),
                                             name="Latent_%s" % (idx))(output)
                else:
                    output_i = self.Unit(l2, name="Latent_%s" % (idx))(output)
                output_i = LayerNormalization(name="LayerNormm_%s" %
                                              (idx))(output_i)
                minimodels.append(output_i)

        # Combine to a single model using the selected mode
        if len(minimodels) == 1:
            output = minimodels[0]
        elif self.merge == 1:
            output = average(minimodels)
        elif self.merge == 2:
            output = WeightedAverage()(minimodels)
        else:
            # Default is concatenate
            output = concatenate(minimodels)

        # Apply a dropout and compute the probabilities
        output = Dropout(self.Dropout)(output)
        output = Dense(num_chars, name="Output")(output)
        output = Activation("softmax")(output)

        # Compile the model using the specified methods
        self.model = Model([comment_seq], output)
        if verbose:
            self.model.summary()

        # Load the weight if specified
        if weightsfile is not None:
            self.model.load_weights(weightsfile)

        # Done
        return self.model
Exemple #3
0
maxlen = 400
batch_size = 32
embedding_dims = 50
filters = 250
kernel_size = 3  # 3-gram
hidden_dims = 250
epochs = 5

# 准备数据
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

model = Sequential()
model.add(Embedding(max_features, embedding_dims, input_length=maxlen))
model.add(Bidirectional(LSTM(128, dropout=0.2)))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          shuffle=True,
          verbose=1,
          validation_split=0.3)
print('test set')
loss, accu = model.evaluate(x_test, y_test, batch_size=100, verbose=1)
print(loss, accu)
Exemple #4
0
                                padding='post')

print("building model...\n")

model = Sequential()

encRNN = GRU
decRNN = LSTM

model.add(
    Embedding(VOCAB_KOR,
              EMBEDDING_SIZE,
              input_length=MAX_INPUT,
              mask_zero=True))
model.add(Dropout(DROPOUTRATE))
model.add(Bidirectional(encRNN(HIDDEN_SIZE, return_sequences=True)))
# model.add(Bidirectional(decRNN(HIDDEN_SIZE, return_sequences=True)))

# from attention import Attention
# model.add(Bidirectional(encRNN(HIDDEN_SIZE, return_sequences=True)))
# model.add(Attention())

model.add(Bidirectional(encRNN(HIDDEN_SIZE)))
model.add(RepeatVector(MAX_LENGTH))

model.add(Bidirectional(decRNN(HIDDEN_SIZE, return_sequences=True)))
model.add(Bidirectional(decRNN(HIDDEN_SIZE, return_sequences=True)))
model.add(Bidirectional(decRNN(HIDDEN_SIZE, return_sequences=True)))
model.add(TimeDistributed(Dense(VOCAB_ENG)))
model.add(Activation('softmax'))
    def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input',
                                shape=input_shape,
                                dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2),
                                   name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5),
                            strides=(1, 2, 2),
                            activation='relu',
                            kernel_initializer='he_normal',
                            name='conv1')(self.zero1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max1')(self.conv1)
        self.drop1 = Dropout(0.5)(self.maxp1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.drop1)
        self.conv2 = Conv3D(64, (3, 5, 5),
                            strides=(1, 1, 1),
                            activation='relu',
                            kernel_initializer='he_normal',
                            name='conv2')(self.zero2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max2')(self.conv2)
        self.drop2 = Dropout(0.5)(self.maxp2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.drop2)
        self.conv3 = Conv3D(96, (3, 3, 3),
                            strides=(1, 1, 1),
                            activation='relu',
                            kernel_initializer='he_normal',
                            name='conv3')(self.zero3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max3')(self.conv3)
        self.drop3 = Dropout(0.5)(self.maxp3)

        self.resh1 = TimeDistributed(Flatten())(self.drop3)

        self.gru_1 = Bidirectional(GRU(256,
                                       return_sequences=True,
                                       kernel_initializer='Orthogonal',
                                       name='gru1'),
                                   merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256,
                                       return_sequences=True,
                                       kernel_initializer='Orthogonal',
                                       name='gru2'),
                                   merge_mode='concat')(self.gru_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size,
                            kernel_initializer='he_normal',
                            name='dense1')(self.gru_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels',
                            shape=[self.absolute_max_string_len],
                            dtype='float32')
        self.input_length = Input(name='input_length',
                                  shape=[1],
                                  dtype='int64')
        self.label_length = Input(name='label_length',
                                  shape=[1],
                                  dtype='int64')

        self.loss_out = CTC(
            'ctc',
            [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[
            self.input_data, self.labels, self.input_length, self.label_length
        ],
                           outputs=self.loss_out)
Exemple #6
0
                                monitor="val_loss")
reducer_lr2 = ReduceLROnPlateau(monitor='val_loss',
                                factor=0.8,
                                patience=0,
                                verbose=1,
                                mode='auto',
                                cooldown=0,
                                min_lr=1e-6)
earlyStopping2 = EarlyStopping(monitor='val_loss',
                               min_delta=0,
                               patience=4,
                               verbose=1)

review_input = Input(shape=(60, ), dtype='int32')
embedded_sequence = embedding_layer(review_input)
bilstmLayer = Bidirectional(LSTM(128, dropout_W=0.2, dropout_U=0.2))
x = bilstmLayer(embedded_sequence)
denseLayer2 = Dense(5, activation='softmax')(x)

model2 = Model(inputs=[review_input], outputs=denseLayer2)
model2.compile(optimizer='rmsprop',
               loss='categorical_crossentropy',
               metrics=['accuracy'])
model2.summary()

#====================================================training====================================================

history2 = model2.fit(
    X_Train_encodedPadded_words,
    Y_train,
    epochs=epochs,
Exemple #7
0

# model setting

if __name__ == '__main__':
    lstm_shape = 250
    

    # Define the model
    inp = Input(shape=(text_max_words,))
    emb = Embedding(input_dim=vocab_size, output_dim=EMB_DIM,
                trainable=False, weights=[embedding_matrix], input_length=text_max_words)(inp)
    # max_features = vocab_size, maxlen=text_max_words, embed_size=EMB_DIM
    # emb = Embedding(input_dim=max_features, input_length = maxlen, output_dim=embed_size)(inp)
    x = SpatialDropout1D(0.1)(emb)
    x = Bidirectional(LSTM(lstm_shape, return_sequences=True, dropout=0.3, recurrent_dropout=0.3))(x)
    x, attention = Attention()(x)
    x = Dense(11, activation="sigmoid")(x)

    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

    attention_model = Model(inputs=inp, outputs=attention) # Model to print out the attention data
    model.summary()
    # verbose= ? , validation_split은 validation file로 변환시켜주어야 한다.
    
    # model.fit(X_t, y, validation_data=(x_val,y_val), epochs=3, verbose=1, batch_size=512)
    model.fit(X_t, y, validation_data=(x_val,y_val), epochs=50, verbose=1, batch_size=32)
morph_seg = []
for i in range(number_of_segmentation):
    morph_seg.append(Input(shape=(None, ), dtype='int32'))

morph_embedding = Embedding(input_dim=len(set(morphs)) + 1,
                            output_dim=50,
                            mask_zero=True,
                            name="embeddding")

embed_seg = []
for i in range(number_of_segmentation):
    embed_seg.append(morph_embedding(morph_seg[i]))

biLSTM = Bidirectional(LSTM(200,
                            dropout=0.2,
                            recurrent_dropout=0.2,
                            return_sequences=True),
                       merge_mode='concat')

encoded_seg = []
for i in range(number_of_segmentation):
    encoded_seg.append(biLSTM(embed_seg[i]))
'''
attn_dense_seq = []
attention_morpheme_dense = TimeDistributed(Dense(200))
for i in range(number_of_segmentation):
    attn_dense_seq.append(attention_morpheme_dense(encoded_seg[i]))
'''

attention_morpheme_nonlinear = TimeDistributed(
    Dense(units=400, activation='tanh', use_bias=False))
Exemple #9
0
print('x_test shape:', x_train.shape)
print('y_test shape:', y_train.shape)

BATCH_SIZE = 16
EPOCHS = 200
TIME_STEPS = x_train.shape[1]
INPUT_SIZE = x_train.shape[2]
LR = 0.005 
n_hidden = 10
opt = RMSprop(lr=LR,decay=0.0002)

# Build model BiLSTM
print('Build model...')
tStart = time.time()
model = Sequential()
model.add(Bidirectional(LSTM(n_hidden,return_sequences=False,activation='tanh'),input_shape=x_train.shape[1:],merge_mode='sum'))
model.add(Dropout(0.2))
model.add(Dense(OUTPUT_SIZE, activation='softmax'))
model.summary()
model.compile(
    loss='categorical_crossentropy',
    optimizer=opt,   
    metrics=['accuracy']
)

DateTime = str(time.strftime("%m%d_%H%M", time.localtime()))
# checkpoint path
directory = './modelSave'
if not os.path.exists(directory):
    os.makedirs(directory)
filepath="{0}/bci_{1}_{2}_{3}".format(directory, DateTime, subject, n_hidden)
Exemple #10
0
    elif s['model'] == 'CNN':

        cnn = Conv1D(150, 3, padding='same', activation='relu')(input_vec)
        cnn = Conv1D(150, 3, padding='same', activation='relu')(cnn)

        pool = GlobalMaxPooling1D()(cnn)

        pool = Dense(s['hidden_dims'], activation='relu',
                     name='cnn_dense')(pool)
        pool = Dropout(0.2)(pool)

    elif s['model'] == 'LSTM-CNN':
        lstm = Bidirectional(
            LSTM(s['lstm_h'],
                 implementation=2,
                 dropout=0.2,
                 recurrent_dropout=0.2,
                 return_sequences=True))(input_vec)
        tanhl = TimeDistributed(Dense(20, activation="tanh"))(lstm)
        cnn = cnn = Conv1D(60, 3, padding='same', activation='tanh')(tanhl)
        pool = GlobalMaxPooling1D()(cnn)
        pool = Dense(s['hidden_dims'], activation='tanh',
                     name='lc_dense')(pool)
        pool = Dropout(0.2)(pool)

    elif s['model'] == 'RCNN':
        l_context = Input(shape=(s['max_len'], ),
                          dtype='int32',
                          name='l_context')
        r_context = Input(shape=(s['max_len'], ),
                          dtype='int32',
Exemple #11
0
def make_model(num_tfs, num_bws, num_motifs, num_recurrent, num_dense,
               dropout_rate):
    from keras import backend as K
    from keras.models import Model
    from keras.layers import Dense, Dropout, Activation, Flatten, Layer, merge, Input
    from keras.layers.convolutional import Convolution1D, MaxPooling1D
    from keras.layers.pooling import GlobalMaxPooling1D
    from keras.layers.recurrent import LSTM
    from keras.layers.wrappers import Bidirectional, TimeDistributed
    '''
    import tensorflow as tf
    config = tf.ConfigProto(device_count={'gpu':1})
    config.gpu_options.allow_growth=True
    session = tf.Session(config=config)
    '''
    forward_input = Input(shape=(
        L,
        4 + num_bws,
    ))
    reverse_input = Input(shape=(
        L,
        4 + num_bws,
    ))
    if num_recurrent < 0:
        hidden_layers = [
            Convolution1D(input_dim=4 + num_bws,
                          nb_filter=num_motifs,
                          filter_length=w,
                          border_mode='valid',
                          activation='relu',
                          subsample_length=1),
            Dropout(0.1),
            TimeDistributed(Dense(num_motifs, activation='relu')),
            GlobalMaxPooling1D(),
            Dropout(dropout_rate),
            Dense(num_dense, activation='relu'),
            Dropout(dropout_rate),
            Dense(num_tfs, activation='sigmoid')
        ]
    elif num_recurrent == 0:
        hidden_layers = [
            Convolution1D(input_dim=4 + num_bws,
                          nb_filter=num_motifs,
                          filter_length=w,
                          border_mode='valid',
                          activation='relu',
                          subsample_length=1),
            Dropout(0.1),
            TimeDistributed(Dense(num_motifs, activation='relu')),
            MaxPooling1D(pool_length=w2, stride=w2),
            Dropout(dropout_rate),
            Flatten(),
            Dense(num_dense, activation='relu'),
            Dropout(dropout_rate),
            Dense(num_tfs, activation='sigmoid')
        ]
    else:
        hidden_layers = [
            Convolution1D(input_dim=4 + num_bws,
                          nb_filter=num_motifs,
                          filter_length=w,
                          border_mode='valid',
                          activation='relu',
                          subsample_length=1),
            Dropout(0.1),
            TimeDistributed(Dense(num_motifs, activation='relu')),
            MaxPooling1D(pool_length=w2, stride=w2),
            Bidirectional(
                LSTM(num_recurrent,
                     dropout_W=0.1,
                     dropout_U=0.1,
                     return_sequences=True)),
            Dropout(dropout_rate),
            Flatten(),
            Dense(num_dense, activation='relu'),
            Dropout(dropout_rate),
            Dense(num_tfs, activation='sigmoid')
        ]
    forward_output = get_output(forward_input, hidden_layers)
    reverse_output = get_output(reverse_input, hidden_layers)
    output = merge([forward_output, reverse_output], mode='ave')
    model = Model(input=[forward_input, reverse_input], output=output)

    return model
Exemple #12
0
for word, index in word2idx.items():
    try:
        embedding_weights[index, :] = word2vec[word.lower()]
    except KeyError:
        pass  # keep as zero (not ideal, but what else can we do?)

print("Building model...")
qenc = Sequential()
qenc.add(
    Embedding(output_dim=WORD2VEC_EMBED_SIZE,
              input_dim=vocab_size,
              weights=[embedding_weights],
              mask_zero=True))
qenc.add(
    Bidirectional(LSTM(QA_EMBED_SIZE,
                       input_length=seq_maxlen,
                       return_sequences=False),
                  merge_mode="sum"))
qenc.add(Dropout(0.3))

aenc = Sequential()
aenc.add(
    Embedding(output_dim=WORD2VEC_EMBED_SIZE,
              input_dim=vocab_size,
              weights=[embedding_weights],
              mask_zero=True))
aenc.add(
    Bidirectional(LSTM(QA_EMBED_SIZE,
                       input_length=seq_maxlen,
                       return_sequences=False),
                  merge_mode="sum"))
aenc.add(Dropout(0.3))
Exemple #13
0
    states_train = list(f_train.keys())[0]
    projed_rep_Ru_train = list(f_train[states_train])
    projed_rep_Ru_train = np.array(projed_rep_Ru_train)
    

    # Load Russian test projected data
    f_test = h5py.File("./output_adv_NoMT/projected_rep_%s_test_1k.hdf5"%lang, 'r')
    states_test = list(f_test.keys())[0]
    projed_rep_Ru_test = list(f_test[states_test])
    projed_rep_Ru_val = np.array(projed_rep_Ru_test)


    
    # Building model 
    myInput = Input(shape=(150,200))
    LSTM_Russian = Bidirectional(LSTM(100,return_sequences=False))(myInput)
    #LSTM_Russian=Bidirectional(LSTM(32, return_sequences=False))(LSTM_Russian)
    predictions = Dense(1, activation='sigmoid')(LSTM_Russian)
    model_Ru = Model(inputs=myInput, outputs=predictions)
    model_Ru.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    print(model_Ru.summary())

    print(len(projed_rep_Ru_train))
    print(len(y_train_Ru))
    class_weight = {0: 1.,1: 1}
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)
    model_Ru.fit(projed_rep_Ru_train, y_train_Ru, epochs=10, batch_size=32,
        validation_data=[projed_rep_Ru_val, y_val_Ru],
        callbacks=[early_stopping],class_weight=class_weight)
    
    # Load Russian test projected data
from keras.callbacks import *
#checkpoint=ModelCheckpoint(filepath=destination+os.sep+"weights.FL.h5",monitor='val_loss',verbose=0,save_best_only=True,save_weights_only=False,mode='auto',period=1)
#earlystopping=EarlyStopping(monitor='val_loss',patience=3)
from keras.layers import LeakyReLU

input_layer = Input(shape=(maxlen, ), dtype='int32')
embedding_layer = Embedding(len(word_index) + 1,
                            emb_dim,
                            weights=[embedding_matrix],
                            input_length=maxlen,
                            trainable=False)
emb_seq = embedding_layer(input_layer)

#######################FIRST_LAYER_LSTM's#################################################
x = Bidirectional(
    LSTM(51, return_sequences=True, dropout=0.2,
         recurrent_dropout=0.2))(emb_seq)  # # #
x = LeakyReLU()(x)

last_state = Lambda(lambda x: x[:, -1, :])(BatchNormalization()(x))

y = GlobalMaxPool1D()(x)
y = BatchNormalization()(y)  #<- Contains Most Relevant Features

import keras
merged = keras.layers.concatenate([y, last_state], axis=-1)

#Concatenate Summary and most relevant features
#import keras
#smerged=keras.layers.concatenate([x,batch_normalized_last_state], axis=-1)
                                 use_bias=True)(reshapedPoolForSentence)

    densePoolForSentence = Dropout(dr, name='DropDense' +
                                   str(i))(densePoolForSentence)
    maxPooledPerDoc.append(densePoolForSentence)

#Naive Approach
averaged = Average()(maxPooledPerDoc)
averaged = Lambda(lambda x: K.reshape(
    x, shape=(-1, int(averaged.shape[1]) * int(averaged.shape[2]))),
                  name='attend_output')(averaged)
out_avg = Dense(1, activation='sigmoid', use_bias=True)(averaged)

#Apply Attention
mergedPoolPerDoc = Concatenate(axis=1)(maxPooledPerDoc)
biRnn_ = Bidirectional(GRU(dimGRU, return_sequences=True),
                       merge_mode='concat')(mergedPoolPerDoc)
newShape = (-1, int(mergedPoolPerDoc.shape[1]), int(biRnn_.shape[2]))
biRnn = Lambda(lambda x: K.reshape(x, shape=newShape),
               name='biRnn_TF_Reminder1')(biRnn_)
#biRnn2 = Lambda(lambda x: K.reshape(x,shape=newShape), name ='biRnn_TF_Reminder2')(biRnn_[1])

#QIITA
#repeat_dec = TimeDistributed(RepeatVector(numSentencesPerDoc), name='repeat_')(biRnn)
#annotation_layer = TimeDistributed(Dense(CONTEXT_DIM))(biRnn)

#biRnn_cat = Concatenate(axis = 2)([biRnn1, biRnn2])

CONTEXT_DIM = 100

eij_ = Dense(CONTEXT_DIM, use_bias=True, activation='tanh')(biRnn)
eij = Dense(1, use_bias=False, name='attention_weights')(eij_)
Exemple #16
0
def train(cwsInfo, cwsData, modelPath, weightPath, w2vecPath):

    (initProb, tranProb), (vocab, indexVocab) = cwsInfo
    (X, y) = cwsData
    train_X, test_X, train_y, test_y = train_test_split(X,
                                                        y,
                                                        train_size=0.9,
                                                        random_state=1)

    #     (train_X,train_y)=cwsData
    #     (test_X,test_y)=testData

    train_X = np.array(train_X)  #转换weigh矩阵
    train_y = np.array(train_y)
    test_X = np.array(test_X)
    test_y = np.array(test_y)

    outputDims = len(eletags.corpus_tags)
    Y_train = np_utils.to_categorical(train_y, outputDims)  #标签用one-hot表示
    Y_test = np_utils.to_categorical(test_y, outputDims)  #标签用one-hot表示
    batchSize = 128
    vocabSize = len(vocab) + 1
    wordDims = 100
    maxlen = 7
    hiddenDims = 128

    w2vModel = Word2Vec.load(w2vecPath)
    embeddingDim = w2vModel.vector_size
    embeddingUnknown = [0 for i in range(embeddingDim)]
    embeddingWeights = np.zeros((vocabSize + 1, embeddingDim))
    for word, index in vocab.items():
        if word in w2vModel:
            e = w2vModel[word]
        else:
            e = embeddingUnknown
        embeddingWeights[index, :] = e

    #LSTM
    model = Sequential()
    #输入层
    model.add(
        Embedding(input_dim=vocabSize + 1,
                  output_dim=embeddingDim,
                  input_length=maxlen,
                  mask_zero=True,
                  weights=[embeddingWeights]))
    #隐层
    model.add(Bidirectional(LSTM(output_dim=hiddenDims,
                                 return_sequences=True)))
    #     model.add(TimeDistributed(Dense(outputDims)))
    #     model.add(Dropout(0.5))#Dropout 层用于防止过拟合
    model.add(
        Bidirectional(LSTM(output_dim=hiddenDims, return_sequences=False)))
    model.add(Dropout(0.5))  #Dropout 层用于防止过拟合
    #输出层
    model.add(Dense(outputDims))  #全链接层
    model.add(Activation('softmax'))  #激活层对一个层的输出施加激活函数
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=["accuracy", "precision", "recall", "fmeasure"])

    model.fit(train_X,
              Y_train,
              batch_size=batchSize,
              nb_epoch=10,
              verbose=1,
              validation_data=(test_X, Y_test))

    #     plot(model, to_file='model.png',show_shapes=True)
    j = model.to_json()
    fd = open(modelPath, 'w')
    fd.write(j)
    fd.close()
    model.save_weights(weightPath)
    return model
                 True, t_vocabsize)

# 학습/테스트 셋 분할
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2,
                                                random_state=42)
print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape)

# 네트워크 정의
model = Sequential()
model.add(Embedding(s_vocabsize, EMBED_SIZE,
                    input_length=MAX_SEQLEN))
model.add(SpatialDropout1D(0.2))
# model.add(LSTM(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2))
# model.add(GRU(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2))
#model.add(Bidirectional(LSTM(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2)))
model.add(Bidirectional(LSTM(HIDDEN_SIZE, dropout=0.2, recurrent_dropout=0.2)))


model.add(RepeatVector(MAX_SEQLEN))
# model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
# model.add(GRU(HIDDEN_SIZE, return_sequences=True))
model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True)))
model.add(TimeDistributed(Dense(t_vocabsize)))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam",
              metrics=["accuracy"])

model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
          validation_data=[Xtest, Ytest])
Exemple #18
0
def build_network(input_shape, num_actions):
    input_data = Input(shape = input_shape, name = "input")

    
    print('>>>> Defining Recurrent Modules...')
    input_data_expanded = Reshape((input_shape[0], input_shape[1], input_shape[2], 1), input_shape = input_shape) (input_data)
    #input_data_TimeDistributed = Permute((3, 1, 2, 4), input_shape=input_shape)(input_data_expanded)

    
    h1 = TimeDistributed(Convolution2D(32, 8, 8, subsample=(4, 4), activation = "relu"), \
        input_shape=(10, input_shape[0], input_shape[1], 1))(input_data_expanded)
    h2 = TimeDistributed(Convolution2D(64, 4, 4, subsample=(2, 2), activation = "relu"))(h1)
    h3 = TimeDistributed(Convolution2D(64, 3, 3, subsample=(1, 1), activation = "relu"))(h2)
    flatten_hidden = TimeDistributed(Flatten())(h3)
    hidden_input = TimeDistributed(Dense(512, activation = 'relu', name = 'flat_to_512')) (flatten_hidden)
    

    #Bidrection for a_fc(s,a) and v_fc layer
    ##################################
    if 1==0:#args.bidir:
        value_hidden =Bidirectional(LSTM(256, return_sequences=True, name = 'value_hidden', stateful=False, input_shape=(10, 512)), merge_mode='sum') (hidden_input) #Dense(512, activation = 'relu', name = 'value_fc')(all_outs)
        value_hidden_out = Bidirectional(LSTM(256, return_sequences=True, name = 'action_hidden_out', stateful=False, input_shape=(10, 256)), merge_mode='sum') (value_hidden)
        action_hidden =Bidirectional(LSTM(256, return_sequences=True,name = 'action_hidden', stateful=False, input_shape=(10, 256)), merge_mode='sum') (hidden_input) #Dense(512, activation = 'relu', name = 'value_fc')(all_outs)
        action_hidden_out = Bidirectional(LSTM(256, return_sequences=True,  name = 'action_hidden_out', stateful=False, input_shape=(10, 256)), merge_mode='sum') (action_hidden)

    else:
         value_hidden_out = LSTM(512, return_sequences=True, stateful=False, input_shape=(10, 512)) (hidden_input)
         action_hidden_out = LSTM(512, return_sequences=True, stateful=False, input_shape=(10, 512)) (hidden_input)
    
    value = TimeDistributed(Dense(1, name = "value"))(value_hidden_out)
    action = TimeDistributed(Dense(num_actions, name = "action"))(action_hidden_out)
    
    attention_vs = TimeDistributed(Dense(1, activation='tanh'),name = "AVS")(value) 
    attention_vs = Flatten()(attention_vs)
    attention_vs = Activation('softmax')(attention_vs)
    attention_vs = RepeatVector(1)(attention_vs)
    attention_vs = Permute([2, 1])(attention_vs)
    sent_representation_vs = merge([value, attention_vs], mode='mul',name = "Attention V")

    attention_pol = TimeDistributed(Dense(1, activation='tanh'),name = "AAS")(action) 
    attention_pol = Flatten()(attention_pol)
    attention_pol = Activation('softmax')(attention_pol)
    attention_pol = RepeatVector(num_actions)(attention_pol)
    attention_pol = Permute([2, 1])(attention_pol)
    sent_representation_policy =merge([action, attention_pol], mode='mul',name = "Attention P")


    #context_value = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(1,))(sent_representation_vs)
    #value = Dense(1, activation='linear', name='value')(context_value)
    #context_policy = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(num_actions,))(sent_representation_policy)
    #con_policy =Dense(num_actions, activation='relu')(context_policy)
    #policy = Dense(num_actions, activation='softmax', name='policy')(con_policy)
    context_value = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(1,))(sent_representation_vs)
    value = Dense(1, activation='linear', name='value')(context_value)
    context_policy = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(num_actions,))(sent_representation_policy)
    policy =Dense(num_actions, activation='softmax')(context_policy)

    value_network = Model(input=input_data, output=value)
    policy_network = Model(input=input_data, output=policy)

    adventage = Input(shape=(1,))
    train_network = Model(input=[input_data, adventage], output=[value, policy])
    print(train_network.summary())
    return value_network, policy_network, train_network, adventage
Exemple #19
0
# bert layer
bert_input = Input(shape=(MAX_LENGTH,768), name='bert_input')
bert_drpot = Dropout(DROPOUTRATE, name='bert_drpot')(bert_input)

# emlo layer
emlo_input = Input(shape=(MAX_LENGTH,), dtype=tf.int64, name='emlo_input')
emlo_embed = ELMoEmbedding(idx2word=idx2word, 
                        output_mode="elmo", name='emlo_embedding', trainable=True)(emlo_input) # These two are interchangeable
#sentence_embedding = Embedding(len(idx2word), 1024, input_length=MAX_SEQUENCE_LENGTH, trainable=False)(sentence_input) # These two are interchangeable

# add auxiliary layer
auxiliary_input = Input(shape=(MAX_LENGTH,1), name='aux_input') #(None, 30, 1)

# merged layers : merge (concat, average...) word and pos > bi-LSTM > bi-LSTM
mrg_cncat = concatenate([emlo_embed, pos_drpot], axis=2)
mrg_lstml = Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True),
                          name='mrg_bidirectional_1')(mrg_cncat)

# extra LSTM layer, if wanted
mrg_drpot = Dropout(DROPOUTRATE, name='mrg_dropout')(mrg_lstml)
mrg_lstml = Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True),
                          name='mrg_bidirectional_2')(mrg_lstml)

# merge BLSTM layers and extenal layer
# mrg_cncat = concatenate([mrg_lstml, txt_drpot, npos_drpot, auxiliary_input], axis=2)
mrg_cncat = concatenate([mrg_lstml, txt_drpot, auxiliary_input], axis=2)
# final linear chain CRF layer
crf = CRF(NER_VOCAB, sparse_target=True)
mrg_chain = crf(mrg_cncat)

model = Model(inputs=[txt_input, emlo_input, pos_input, npos_input, auxiliary_input], outputs=mrg_chain)
# model = Model(inputs=[txt_input, emlo_input, pos_input,  auxiliary_input], outputs=mrg_chain)
Exemple #20
0
embedding_matrix_flair = np.load('aes_score/prepare_data/fasttest-flair.npy')
word2idx, idx2word = load_file(
    'aes_score/prepare_data/dectection.version2.vocab.pkl'), load_file(
        'aes_score/prepare_data/dectection.version2.id.vocab.pkl')
MAX_VOCAB = len(word2idx)

txt_input = Input(shape=(None, ), name='txt_input')
txt_embed = Embedding(MAX_VOCAB,
                      4396,
                      name='txt_embedding',
                      weights=[embedding_matrix_flair],
                      mask_zero=True,
                      trainable=False)(txt_input)
txt_drpot = Dropout(0.5, name='txt_dropout')(txt_embed)
lstm1 = Bidirectional(
    LSTM(256, dropout=0.2, recurrent_dropout=0.2,
         return_sequences=True))(txt_drpot)
lstm1 = Bidirectional(
    LSTM(256, dropout=0.2, recurrent_dropout=0.2,
         return_sequences=True))(lstm1)
crf = CRF(len(tags) + 2, sparse_target=True)
mrg_chain = crf(lstm1)
model = Model(inputs=[txt_input], outputs=mrg_chain)

model.load_weights('aes_score/prepare_data/model.best.h5')
model.predict(np.zeros((1, 4396)))


def sentence_input_index(sen_arry_token):
    text_x = index_sents(sen_arry_token, word2idx)
    X_test_sents = sequence.pad_sequences(text_x,
Exemple #21
0
            For Keras internal compatability checking
        """
        if self.return_probabilities:
            return (None, self.timesteps, self.timesteps)
        else:
            return (None, self.timesteps, self.output_dim)

    def get_config(self):
        """
            For rebuilding models on load time.
        """
        config = {
            'output_dim': self.output_dim,
            'units': self.units,
            'return_probabilities': self.return_probabilities
        }
        base_config = super(AttentionDecoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


# check to see if it compiles
if __name__ == '__main__':
    from keras.layers import Input, LSTM
    from keras.models import Model
    from keras.layers.wrappers import Bidirectional
    i = Input(shape=(100, 104), dtype='float32')
    enc = Bidirectional(LSTM(64, return_sequences=True),
                        merge_mode='concat')(i)
    dec = AttentionDecoder(32, 4)(enc)
    model = Model(inputs=i, outputs=dec)
    model.summary()
Exemple #22
0
    def __init__(self, inputs=None, outputs=None,
                 N=None, M=None, unroll=False,
                 hdim=300, word2vec_dim=300, dropout_rate=0.2,
                 **kwargs):
        # Load model from config
        if inputs is not None and outputs is not None:
            super(FastQA, self).__init__(inputs=inputs,
                                         outputs=outputs,
                                         **kwargs)
            return

        '''Dimensions'''
        B = None
        H = hdim
        W = word2vec_dim

        '''Inputs'''
        P = Input(shape=(N, W), name='P')
        Q = Input(shape=(M, W), name='Q')

        '''Word in question binary'''

        def wiq_feature(P, Q):
            '''
            Binary feature mentioned in the paper.
            For each word in passage returns if that word is present in question.
            '''
            slice = []
            for i in range(N):
                word_sim = K.tf.equal(W, K.tf.reduce_sum(
                    K.tf.cast(K.tf.equal(K.tf.expand_dims(P[:, i, :], 1), Q), K.tf.int32), axis=2))
                question_sim = K.tf.equal(M, K.tf.reduce_sum(K.tf.cast(word_sim, K.tf.int32), axis=1))
                slice.append(K.tf.cast(question_sim, K.tf.float32))

            wiqout = K.tf.expand_dims(K.tf.stack(slice, axis=1), 2)
            return wiqout

        '''Word in question soft alignment'''

        def wiq_feature_soft(P,Q):
            pass

        wiq_p = Lambda(lambda arg: wiq_feature(arg[0], arg[1]))([P, Q])
        wiq_q = Lambda(lambda q: K.tf.ones([K.tf.shape(Q)[0], M, 1], dtype=K.tf.float32))(Q)

        passage_input = P
        question_input = Q
        # passage_input = Lambda(lambda arg: concatenate([arg[0], arg[1]], axis=2))([P, wiq_p])
        # question_input = Lambda(lambda arg: concatenate([arg[0], arg[1]], axis=2))([Q, wiq_q])

        '''Encoding'''
        encoder = Bidirectional(LSTM(units=W,
                                     return_sequences=True,
                                     dropout=dropout_rate,
                                     unroll=unroll))

        passage_encoding = passage_input
        passage_encoding = encoder(passage_encoding)
        passage_encoding = TimeDistributed(
            Dense(W,
                  use_bias=False,
                  trainable=True,
                  weights=np.concatenate((np.eye(W), np.eye(W)), axis=1)))(passage_encoding)

        question_encoding = question_input
        question_encoding = encoder(question_encoding)
        question_encoding = TimeDistributed(
            Dense(W,
                  use_bias=False,
                  trainable=True,
                  weights=np.concatenate((np.eye(W), np.eye(W)), axis=1)))(question_encoding)

        '''Attention over question'''
        # compute the importance of each step
        question_attention_vector = TimeDistributed(Dense(1))(question_encoding)
        question_attention_vector = Lambda(lambda q: keras.activations.softmax(q, axis=1))(question_attention_vector)

        # apply the attention
        question_attention_vector = Lambda(lambda q: q[0] * q[1])([question_encoding, question_attention_vector])
        question_attention_vector = Lambda(lambda q: K.sum(q, axis=1))(question_attention_vector)
        question_attention_vector = RepeatVector(N)(question_attention_vector)

        '''Answer span prediction'''

        # Answer start prediction
        answer_start = Lambda(lambda arg:
                              concatenate([arg[0], arg[1], arg[2]]))([
            passage_encoding,
            question_attention_vector,
            multiply([passage_encoding, question_attention_vector])])

        answer_start = TimeDistributed(Dense(W, activation='relu'))(answer_start)
        answer_start = TimeDistributed(Dense(1))(answer_start)
        answer_start = Flatten()(answer_start)
        answer_start = Activation('softmax')(answer_start)

        # Answer end prediction depends on the start prediction
        def s_answer_feature(x):
            maxind = K.argmax(
                x,
                axis=1,
            )
            return maxind

        x = Lambda(lambda x: K.tf.cast(s_answer_feature(x), dtype=K.tf.int32))(answer_start)
        start_feature = Lambda(lambda arg: K.tf.gather_nd(arg[0], K.tf.stack(
            [K.tf.range(K.tf.shape(arg[1])[0]), K.tf.cast(arg[1], K.tf.int32)], axis=1)))([passage_encoding, x])
        start_feature = RepeatVector(N)(start_feature)

        # Answer end prediction
        answer_end = Lambda(lambda arg: concatenate([
            arg[0],
            arg[1],
            arg[2],
            multiply([arg[0], arg[1]]),
            multiply([arg[0], arg[2]])
        ]))([passage_encoding, question_attention_vector, start_feature])

        answer_end = TimeDistributed(Dense(W, activation='relu'))(answer_end)
        answer_end = TimeDistributed(Dense(1))(answer_end)
        answer_end = Flatten()(answer_end)
        answer_end = Activation('softmax')(answer_end)

        input_placeholders = [P, Q]
        inputs = input_placeholders
        outputs = [answer_start, answer_end]

        super(FastQA, self).__init__(inputs=inputs,
                                     outputs=outputs,
                                     **kwargs)
place_train = place_feats[train_ind, :]
labels_ind_train = labels_1hot[train_ind, :]

feats_test = feats[test_ind, :, :]
place_test = place_feats[test_ind, :]
labels_ind_test = labels_1hot[test_ind, :]

# Build network
with open(pre_trained_weights_file) as fid:
    pre_train_weights = pickle.load(fid)

seq_model = Sequential()
seq_model.add(
    Bidirectional(LSTM(LAYER_SIZE1,
                       dropout=0.2,
                       recurrent_dropout=0.2,
                       return_sequences=True),
                  input_shape=(frame_dim, vec_dim)))
seq_model.add(
    Bidirectional(
        LSTM(LAYER_SIZE2,
             dropout=0.2,
             recurrent_dropout=0.2,
             return_sequences=False)))
seq_model.layers[0].set_weights(pre_train_weights[0])
seq_model.layers[1].set_weights(pre_train_weights[1])

emb_input = Input(shape=(frame_dim, vec_dim))
lstm_sub = seq_model(emb_input)

place_input = Input(shape=(place_list_size, ))
Exemple #24
0
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len,
                 n_phonetic_features, y1, n1, y2, n2, y3, n3, y4, n4, y5, n5,
                 y6, n6, hidden_size, num_layers):
    def smart_merge(vectors, **kwargs):
        return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs)

    current_word = Input(shape=(X_max_len, ), dtype='float32',
                         name='input1')  # for encoder (shared)
    root_word = Input(shape=(X_max_len, ), dtype='float32', name='input2')
    decoder_input = Input(shape=(X_max_len, ), dtype='float32',
                          name='input3')  # for decoder -- attention
    right_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input4')
    right_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input5')
    right_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input6')
    right_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input7')
    left_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input8')
    left_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input9')
    left_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input10')
    left_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input11')
    phonetic_input = Input(shape=(n_phonetic_features, ),
                           dtype='float32',
                           name='input12')

    emb_layer1 = Embedding(X_vocab_len,
                           EMBEDDING_DIM,
                           input_length=X_max_len,
                           mask_zero=False,
                           name='Embedding')

    list_of_inputs = [
        current_word, root_word, right_word1, right_word2, right_word3,
        right_word4, left_word1, left_word2, left_word3, left_word4
    ]

    current_word_embedding, root_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \
     left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4 = [emb_layer1(i) for i in list_of_inputs]

    print("Typeeeee:: ", type(current_word_embedding))
    current_word_embedding = smart_merge(
        [current_word_embedding,
         root_word_embedding])  # concatenate root word with current input
    list_of_embeddings1 = [current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \
     left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4]

    # list_of_embeddings = [smart_merge([i,root_word_embedding]) for i in list_of_embeddings] # concatenate root word with each of inputs
    list_of_embeddings = [
        Dropout(0.50, name='drop1_' + str(j))(i)
        for i, j in zip(list_of_embeddings1, range(len(list_of_embeddings1)))
    ]
    list_of_embeddings = [
        GaussianNoise(0.05, name='noise1_' + str(j))(i)
        for i, j in zip(list_of_embeddings, range(len(list_of_embeddings)))
    ]

    conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4, conv4_left1, conv4_left2, conv4_left3, conv4_left4 =\
      [Conv1D(filters=no_filters,
       kernel_size=4, padding='valid',activation='relu',
       strides=1, name='conv4_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))]

    conv4s = [
        conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4,
        conv4_left1, conv4_left2, conv4_left3, conv4_left4
    ]
    maxPool4 = [
        MaxPooling1D(name='max4_' + str(j))(i)
        for i, j in zip(conv4s, range(len(conv4s)))
    ]
    avgPool4 = [
        AveragePooling1D(name='avg4_' + str(j))(i)
        for i, j in zip(conv4s, range(len(conv4s)))
    ]

    pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, pool4_left1, pool4_left2, pool4_left3, pool4_left4 = \
     [merge([i,j], name='merge_conv4_'+str(k)) for i,j,k in zip(maxPool4, avgPool4, range(len(maxPool4)))]

    conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4, conv5_left1, conv5_left2, conv5_left3, conv5_left4 = \
      [Conv1D(filters=no_filters,
       kernel_size=5,
       padding='valid',
       activation='relu',
       strides=1, name='conv5_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))]

    conv5s = [
        conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4,
        conv5_left1, conv5_left2, conv5_left3, conv5_left4
    ]
    maxPool5 = [
        MaxPooling1D(name='max5_' + str(j))(i)
        for i, j in zip(conv5s, range(len(conv5s)))
    ]
    avgPool5 = [
        AveragePooling1D(name='avg5_' + str(j))(i)
        for i, j in zip(conv5s, range(len(conv5s)))
    ]

    pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, pool5_left1, pool5_left2, pool5_left3, pool5_left4 = \
     [merge([i,j], name='merge_conv5_'+str(k)) for i,j,k in zip(maxPool5, avgPool5, range(len(maxPool5)))]


    maxPools = [pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, \
     pool4_left1, pool4_left2, pool4_left3, pool4_left4, \
     pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, \
     pool5_left1, pool5_left2, pool5_left3, pool5_left4]

    concat = merge(maxPools, mode='concat', name='main_merge')
    # curr_vector_total = smart_merge([pool4_curr, pool5_curr], mode='concat')

    x = Dropout(0.15, name='drop_single1')(concat)

    x = Bidirectional(RNN(rnn_output_size, name='rnn_for_features'))(x)

    total_features = [x, phonetic_input]
    concat2 = merge(total_features, mode='concat', name='phonetic_merging')

    x = Dense(HIDDEN_DIM,
              activation='relu',
              kernel_initializer='he_normal',
              kernel_constraint=maxnorm(3),
              bias_constraint=maxnorm(3),
              name='dense1')(concat2)
    x = Dropout(0.15, name='drop_single2')(x)

    x = Dense(HIDDEN_DIM,
              kernel_initializer='he_normal',
              activation='tanh',
              kernel_constraint=maxnorm(3),
              bias_constraint=maxnorm(3),
              name='dense2')(x)
    x = Dropout(0.15, name='drop_single3')(x)

    out1 = Dense(n1,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output1')(x)
    out2 = Dense(n2,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output2')(x)
    out3 = Dense(n3,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output3')(x)
    out4 = Dense(n4,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output4')(x)
    out5 = Dense(n5,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output5')(x)
    out6 = Dense(n6,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output6')(x)

    # Luong et al. 2015 attention model
    emb_layer = Embedding(X_vocab_len,
                          EMBEDDING_DIM,
                          input_length=X_max_len,
                          mask_zero=True,
                          name='Embedding_for_seq2seq')

    current_word_embedding = emb_layer(current_word)
    current_word_embedding = GaussianNoise(
        0.05, name='noise_seq2seq')(current_word_embedding)

    encoder, state = RNN(rnn_output_size,
                         return_sequences=True,
                         unroll=True,
                         return_state=True,
                         name='encoder')(current_word_embedding)
    encoder_last = encoder[:, -1, :]

    decoder = emb_layer(decoder_input)
    decoder = GRU(rnn_output_size,
                  return_sequences=True,
                  unroll=True,
                  name='decoder')(decoder, initial_state=[state])

    attention = dot([decoder, encoder], axes=[2, 2], name='dot')
    attention = Activation('softmax', name='attention')(attention)

    context = dot([attention, encoder], axes=[2, 1], name='dot2')
    decoder_combined_context = concatenate([context, decoder],
                                           name='concatenate')

    outputs = TimeDistributed(
        Dense(64, activation='tanh',
              name='TimeDistributed1'))(decoder_combined_context)
    outputs = TimeDistributed(
        Dense(X_vocab_len, activation='softmax',
              name='TimeDistributed2'))(outputs)


    all_inputs = [current_word, root_word, decoder_input, right_word1, right_word2, right_word3, right_word4, left_word1, \
       left_word2, left_word3, left_word4, phonetic_input]
    all_outputs = [outputs, out1, out2, out3, out4, out5, out6]

    model = Model(input=all_inputs, output=all_outputs)
    opt = Adam()
    model.compile(optimizer=Adadelta(epsilon=1e-06),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'],
                  loss_weights=[1., 1., 1., 1., 1., 1., 1.])

    return model
Exemple #25
0
    def __init__(self,
                 dim,
                 batch_norm,
                 dropout,
                 rec_dropout,
                 task,
                 target_repl=False,
                 deep_supervision=False,
                 num_classes=1,
                 depth=1,
                 input_dim=76,
                 **kwargs):

        print "==> not used params in network class:", kwargs.keys()

        self.dim = dim
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.rec_dropout = rec_dropout
        self.depth = depth

        if task in ['decomp', 'ihm', 'ph']:
            final_activation = 'sigmoid'
        elif task in ['los']:
            if num_classes == 1:
                final_activation = 'relu'
            else:
                final_activation = 'softmax'
        else:
            return ValueError("Wrong value for task")

        # Input layers and masking
        X = Input(shape=(None, input_dim), name='X')
        inputs = [X]
        mX = Masking()(X)

        if deep_supervision:
            M = Input(shape=(None, ), name='M')
            inputs.append(M)

        # Configurations
        is_bidirectional = True
        if deep_supervision:
            is_bidirectional = False

        # Main part of the network
        for i in range(depth - 1):
            num_units = dim
            if is_bidirectional:
                num_units = num_units // 2

            gru = GRU(units=num_units,
                      activation='tanh',
                      return_sequences=True,
                      recurrent_dropout=rec_dropout,
                      dropout=dropout)

            if is_bidirectional:
                mX = Bidirectional(gru)(mX)
            else:
                mX = gru(mX)

        # Output module of the network
        return_sequences = (target_repl or deep_supervision)
        L = GRU(units=dim,
                activation='tanh',
                return_sequences=return_sequences,
                dropout=dropout,
                recurrent_dropout=rec_dropout)(mX)

        if dropout > 0:
            L = Dropout(dropout)(L)

        if target_repl:
            y = TimeDistributed(Dense(num_classes,
                                      activation=final_activation),
                                name='seq')(L)
            y_last = LastTimestep(name='single')(y)
            outputs = [y_last, y]
        elif deep_supervision:
            y = TimeDistributed(Dense(num_classes,
                                      activation=final_activation))(L)
            y = ExtendMask()([y, M])  # this way we extend mask of y to M
            outputs = [y]
        else:
            y = Dense(num_classes, activation=final_activation)(L)
            outputs = [y]

        return super(Network, self).__init__(inputs=inputs, outputs=outputs)
Exemple #26
0
val_loss_history = []

save_loss_callback = LambdaCallback(
    on_epoch_end=lambda epoch, logs: loss_history.append(logs['loss']))
save_valloss_callback = LambdaCallback(
    on_epoch_end=lambda epoch, logs: val_loss_history.append(logs['val_loss']))
monitor_loss = EarlyStopping(monitor='val_loss',
                             min_delta=0,
                             patience=5,
                             verbose=0,
                             mode='auto')

model = Sequential()

model.add(
    Bidirectional(GRU(hidden_units, return_sequences=True),
                  input_shape=(n_chunks, chunk_size)))
model.add(Dropout(0.2))
model.add(
    Bidirectional(
        GRU(hidden_units,
            return_sequences=True,
            kernel_regularizer=regularizers.l2(0.01))))
model.add(Dropout(0.5))
model.add(
    Bidirectional(
        GRU(hidden_units,
            return_sequences=True,
            kernel_regularizer=regularizers.l2(0.01))))
model.add(Dropout(0.5))
model.add(TimeDistributed(Dense(5, activation='linear')))
#rmsprop = optimizers.RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.0)
def define_model(vocab_size, max_len):  #check figure 6.3 in the memory
    #context vector
    story_inputs = Input(shape=(len(XsTrain[0]), ))
    question_inputs = Input(shape=(len(XqTrain[0]), ))

    story_encoder = Embedding(input_dim=vocab_size,
                              output_dim=EMBEDDINGS_SIZE,
                              input_length=len(XsTrain[0]))(story_inputs)
    story_encoder = Dropout(0.3)(story_encoder)
    print('story encoder done!')

    #question encoder memory
    question_encoder = Embedding(input_dim=vocab_size,
                                 output_dim=EMBEDDINGS_SIZE,
                                 input_length=len(XqTrain[0]))(question_inputs)
    question_encoder = Dropout(0.3)(question_encoder)
    print('question encoder done!')

    match = dot([story_encoder, question_encoder], axes=[2, 2])

    #encode story into vector space of question
    story_encoder_c = Embedding(input_dim=vocab_size,
                                output_dim=len(XqTrain[0]),
                                input_length=len(XsTrain[0]))(story_inputs)
    story_encoder_c = Dropout(0.3)(story_encoder_c)
    print('story encoder c done!')

    #combine match and story vectors
    response = add([match, story_encoder_c])
    response = Permute((2, 1))(response)
    print('response done!')

    #combine response and queztion vectors
    answer = concatenate([response, question_encoder], axis=-1)

    encoder = Bidirectional(LSTM(LATENT_SIZE),
                            merge_mode="sum",
                            name="encoder_lstm")(answer)
    encoder = Dropout(0.5)(encoder)
    print('encoder done!')

    #sequence model
    answers_inputs = Input(shape=(max_len, ))
    se1 = Embedding(vocab_size, EMBEDDINGS_SIZE,
                    mask_zero=True)(answers_inputs)
    se2 = Dropout(0.5)(se1)
    se3 = Bidirectional(LSTM(LATENT_SIZE),
                        merge_mode="sum",
                        name="encoder2_lstm")(se2)
    print('sequence model done!')

    #decoder model
    decoder = add([encoder, se3])
    decoder = Dense(LATENT_SIZE, activation='relu')(decoder)
    outputs = Dense(vocab_size, activation='softmax')(decoder)
    print('output done!')

    # tie it together [image, seq] [word]
    model = Model(inputs=[story_inputs, question_inputs, answers_inputs],
                  outputs=outputs)
    # compile model
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.RMSprop(lr=0.0001,
                                               rho=0.9,
                                               epsilon=None,
                                               decay=0),
                  metrics=['acc'])
    # summarize model
    model.summary()
    return model
# get the initial state of the RNG
state = np.random.get_state()

np.random.shuffle(X_train)

np.random.set_state(state)

np.random.shuffle(Y_train)
#######################################################################################################################################
batch_size = 128
max_features = X_train.shape[1]
#######################################################################################################################################
model = Sequential()
model.add(Embedding(max_features, 4, input_shape=[max_features]))
model.add(
    Bidirectional(
        LSTM(100, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)))
model.add(Attention(max_features))
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
# try using different optimizers and different optimizer configs
print(model.summary())
#######################################################################################################################################
# model.load_weights('model_wghts_lstm_1.h5')
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy', f_1])

print('Train...')
model.fit(X_train,
          one_hot(Y_train),
    batch_size = 32
    embedding_dims = 300
    filters = 250
    kernel_size = 3
    hidden_dims = 250
    epochs = 5
    num_neurons = 10

    prep = Preprocessor('corpus_marked', 'vk_comment_model')
    x_train, y_train, x_test, y_test = prep.train_pipeline(maxlen, embedding_dims)

    # Создание двунаправленной рекуррентной сети
    model = Sequential()
    model.add(Bidirectional(
        SimpleRNN(
            num_neurons, return_sequences=True,
            input_shape=(maxlen, embedding_dims)
        ), input_shape=(maxlen, embedding_dims)))


    # Добавление слоя дропаута
    model.add(Dropout(.2))

    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))

    # Компиляция нашей рекуррентной нейронной сети
    model.compile('rmsprop', 'binary_crossentropy', metrics=['accuracy'])

    model.summary()
                print ("[OOV]", w)
        else:
            print ("[OOV]", w)
    #TODO normalize?
    init_emb_W = [init_emb_W]

################################
is_trainable = True
if args.static_emb:
    is_trainable = False
embedding = Embedding(len(idx2word), args.emb_size, input_length=max_seq_len, weights=init_emb_W, trainable=is_trainable)(seq_input)
embedding = Dropout(args.dropout)(embedding)

# [LSTM for slot]
if args.bi_direct:
    slot_lstm_out = Bidirectional(LSTM(args.emb_size, dropout=args.dropout, recurrent_dropout=args.dropout, return_sequences=True, name='slot LSTM', recurrent_regularizer=r_reg))(embedding)
else:
    slot_lstm_out = LSTM(args.emb_size, dropout=args.dropout, recurrent_dropout=args.dropout, return_sequences=True, name='slot LSTM', recurrent_regularizer=r_reg)(embedding)

if args.batch_norm:
    slot_lstm_out = BatchNormalization()(slot_lstm_out)

# [LSTM for intent]
if args.attention:
    intent_lstm_out = LSTM(args.emb_size, dropout=args.dropout, recurrent_dropout=args.dropout, name='intent LSTM', return_sequences=True, recurrent_regularizer=r_reg)(slot_lstm_out)
    attn = TimeDistributed(Dense(1, activation=args.activation))(intent_lstm_out)
    attn = Flatten()(attn)
    attn = Activation('softmax')(attn)
    attn = RepeatVector(args.emb_size)(attn)
    attn = Permute([2, 1])(attn)