Example #1
0
def pretrained_embedding_layer(word_to_vec_map, source_vocab_to_int):
    """
    构造Embedding层并加载预训练好的词向量(这里我使用的是100维)

    @param word_to_vec_map: 单词到向量的映射
    @param word_to_index: 单词到数字编码的映射
    """

    vocab_len = len(source_vocab_to_int) + 1  # Keras Embedding的API要求+1
    emb_dim = word_to_vec_map["the"].shape[0]

    # 初始化embedding矩阵
    emb_matrix = np.zeros((vocab_len, emb_dim))

    # 用词向量填充embedding矩阵
    for word, index in source_vocab_to_int.items():
        word_vector = word_to_vec_map.get(word, np.zeros(emb_dim))
        emb_matrix[index, :] = word_vector

    # 定义Embedding层,并指定不需要训练该层的权重
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)

    # build
    embedding_layer.build((None,))

    # set weights
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
    def build(self):
        assert self.config['question_len'] == self.config['answer_len']

        question = self.question
        answer = self.get_answer()

        # add embedding layers
        embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 100))
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # turn off layer updating
        embedding.params = []
        embedding.updates = []

        # dropout
        dropout = Dropout(0.25)
        question_dropout = dropout(question_embedding)
        answer_dropout = dropout(answer_embedding)

        # dense
        dense = TimeDistributed(Dense(self.model_params.get('n_hidden', 200), activation='tanh'))
        question_dense = dense(question_dropout)
        answer_dense = dense(answer_dropout)

        # regularization
        question_dense = ActivityRegularization(l2=0.0001)(question_dense)
        answer_dense = ActivityRegularization(l2=0.0001)(answer_dense)

        # dropout
        question_dropout = dropout(question_dense)
        answer_dropout = dropout(answer_dense)

        # cnn
        cnns = [Convolution1D(filter_length=filter_length,
                              nb_filter=self.model_params.get('nb_filters', 1000),
                              activation=self.model_params.get('conv_activation', 'relu'),
                              border_mode='same') for filter_length in [2, 3, 5, 7]]
        question_cnn = merge([cnn(question_dropout) for cnn in cnns], mode='concat')
        answer_cnn = merge([cnn(answer_dropout) for cnn in cnns], mode='concat')

        # regularization
        question_cnn = ActivityRegularization(l2=0.0001)(question_cnn)
        answer_cnn = ActivityRegularization(l2=0.0001)(answer_cnn)

        # dropout
        question_dropout = dropout(question_cnn)
        answer_dropout = dropout(answer_cnn)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        question_pool = maxpool(question_dropout)
        answer_pool = maxpool(answer_dropout)

        # activation
        activation = Activation('tanh')
        question_output = activation(question_pool)
        answer_output = activation(answer_pool)

        return question_output, answer_output
Example #3
0
    def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True):

        input = Input(shape=(lembedding.size,), dtype='int32')
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None)(input)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None, )(input)
        emb.trainable = train_vectors
        if unit == 'gru':
            forward = GRU(rnn_size)(emb)
            backward = GRU(rnn_size, go_backwards=True)(emb)
        else:
            forward = LSTM(rnn_size)(emb)
            backward = LSTM(rnn_size, go_backwards=True)(emb)

        merged_rnn = merge([forward, backward], mode='concat')
        dropped = Dropout(0.5)(merged_rnn)
        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(dropped)
            model = Model(input=input, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(dropped)
            model = Model(input=input, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model
Example #4
0
    def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True):

        model = Sequential()
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None)
        emb.trainable = train_vectors
        model.add(emb)
        if unit == 'gru':
            model.add(GRU(rnn_size))
        else:
            model.add(LSTM(rnn_size))
        model.add(Dropout(0.2))
        if num_classes == 2:
            model.add(Dense(1, activation='sigmoid'))
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.add(Dense(num_classes, activation='softmax'))
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model
    def build(self):
        question, answer = self._get_inputs()

        # add embedding layers
        embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141))
        question_embedding = embedding(question)

        a_embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141))
        answer_embedding = embedding(answer)

        a_embedding.set_weights(embedding.get_weights())

        # dropout
        dropout = Dropout(0.5)
        question_dropout = dropout(question_embedding)
        answer_dropout = dropout(answer_embedding)

        # rnn
        forward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True)
        backward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True)
        question_lstm = merge([forward_lstm(question_dropout), backward_lstm(question_dropout)], mode='concat', concat_axis=-1)

        # dropout
        question_dropout = dropout(question_lstm)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        question_pool = maxpool(question_dropout)

        # activation
        activation = Activation('tanh')
        question_output = activation(question_pool)

        question_model = Model(input=[question], output=[question_output])

        # attentional rnn
        forward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True)
        backward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True)
        answer_lstm = merge([forward_lstm(answer_dropout), backward_lstm(answer_dropout)], mode='concat', concat_axis=-1)

        # dropout
        answer_dropout = dropout(answer_lstm)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        answer_pool = maxpool(answer_dropout)

        # activation
        activation = Activation('tanh')
        answer_output = activation(answer_pool)

        answer_model = Model(input=[question, answer], output=[answer_output])

        return question_model, answer_model
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 100), mask_zero=False)
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # turn off layer updating
        embedding.params = []
        embedding.updates = []

        # dropout
        dropout = Dropout(0.25)
        question_dropout = dropout(question_embedding)
        answer_dropout = dropout(answer_embedding)

        # question rnn part
        f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True)
        b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, go_backwards=True)
        question_f_rnn = f_rnn(question_dropout)
        question_b_rnn = b_rnn(question_dropout)
        question_f_dropout = dropout(question_f_rnn)
        question_b_dropout = dropout(question_b_rnn)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        question_pool = merge([maxpool(question_f_dropout), maxpool(question_b_dropout)], mode='concat', concat_axis=-1)

        # answer rnn part
        f_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, single_attn=True, return_sequences=True)
        b_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, single_attn=True, return_sequences=True, go_backwards=True)
        answer_f_rnn = f_rnn(answer_dropout)
        answer_b_rnn = b_rnn(answer_dropout)
        answer_f_dropout = dropout(answer_f_rnn)
        answer_b_dropout = dropout(answer_b_rnn)
        answer_pool = merge([maxpool(answer_f_dropout), maxpool(answer_b_dropout)], mode='concat', concat_axis=-1)

        # activation
        activation = Activation('tanh')
        question_output = activation(question_pool)
        answer_output = activation(answer_pool)

        return question_output, answer_output
Example #7
0
    def _generate_model(self, lembedding, num_classes=2, ngrams=[1,2,3,4,5],
                        nfilters=64, train_vectors=True):

        def sub_ngram(n):
            return Sequential([
                Convolution1D(nfilters, n,
                      activation='relu',
                      input_shape=(lembedding.size, lembedding.vector_box.vector_dim)),
                Lambda(
                    lambda x: K.max(x, axis=1),
                    output_shape=(nfilters,)
                )
        ])

        doc = Input(shape=(lembedding.size, ), dtype='int32')
        embedded = Embedding(input_dim=lembedding.vector_box.size,
                             output_dim=lembedding.vector_box.vector_dim,
                             weights=[lembedding.vector_box.W])(doc)
        embedded.trainable = train_vectors

        rep = Dropout(0.5)(
            merge(
                [sub_ngram(n)(embedded) for n in ngrams],
                mode='concat',
                concat_axis=-1
            )
        )

        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(rep)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(rep)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model
Example #8
0
    def _generate_model(self, lembedding, num_classes=2, num_features=128, train_vectors=True):

        model = Sequential()
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None,
                            input_length=lembedding.size)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None,
                            input_length=lembedding.size)
        emb.trainable = train_vectors
        model.add(emb)

        model.add(Convolution1D(num_features, 3, init='uniform'))
        model.add(Activation('relu'))
        model.add(MaxPooling1D(2))
        model.add(Dropout(0.25))

        model.add(Convolution1D(num_features, 3, init='uniform'))
        model.add(Activation('relu'))
        model.add(MaxPooling1D(2))
        model.add(Dropout(0.25))

        model.add(Flatten())

        if num_classes == 2:
            model.add(Dense(1, activation='sigmoid'))
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.add(Dense(num_classes, activation='softmax'))
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model
Example #9
0
def char_emb_cnn_func(n_characters: int,
                      char_embedding_dim: int,
                      emb_mat: np.array = None,
                        filter_widths=(3, 4, 5, 7),
                        highway_on_top=False):

    emb_layer = Embedding(n_characters,
                          char_embedding_dim)

    if emb_mat is not None:
        emb_layer.set_weights([emb_mat])

    conv2d_layers = []
    for filter_width in filter_widths:
        conv2d_layers.append(Conv2D(char_embedding_dim,
                                    (1, filter_width),
                                    padding='same'))

    if highway_on_top:
        dense1 = Dense(char_embedding_dim * len(filter_widths))
        dense2 = Dense(char_embedding_dim * len(filter_widths))

    def result(input):
        emb_c = emb_layer(input)
        conv_results_list = []
        for cl in conv2d_layers:
            conv_results_list.append(cl(emb_c))
        emb_c = Lambda(lambda x: K.concatenate(x, axis=3))(conv_results_list)
        emb_c = Lambda(lambda x: K.max(x, axis=2))(emb_c)
        if highway_on_top:
            sigmoid_gate = dense1(emb_c)
            sigmoid_gate = Activation('sigmoid')(sigmoid_gate)
            deeper_units = dense2(emb_c)
            emb_c = Add()([Multiply()([sigmoid_gate, deeper_units]),
                           Multiply()([Lambda(lambda x: K.constant(1., shape=K.shape(x)) - x)(sigmoid_gate), emb_c])])
            emb_c = Activation('relu')(emb_c)
        return emb_c

    return result
Example #10
0
  def __init__(self, config):
    """
    Convolution neural network model for sentence classification.
    Parameters
    Sentence CNN by Y.Kim
    ----------
    EMBEDDING_DIM: Dimension of the embedding space.
    MAX_SEQUENCE_LENGTH: Maximum length of the sentence.
    MAX_NB_WORDS: Maximum number of words in the vocabulary.
    embeddings_index: A dict containing words and their embeddings.
    word_index: A dict containing words and their indices.
    labels_index: A dict containing the labels and their indices.
    Returns
    -------
    compiled keras model
    """
    self.batch_size = config.batch_size
    self.num_epoch = config.num_epoch

    EMBEDDING_DIM = 300
    MAX_SEQUENCE_LENGTH = config.max_slen[config.dataset_name]
    # embedding_matrix = np.zeros((config.vocab_size, EMBEDDING_DIM))
    embedding_layer = Embedding(config.vocab_size,
      EMBEDDING_DIM,
      input_length=MAX_SEQUENCE_LENGTH,
      trainable=True)

    sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(sequence_input)

    # add first conv filter
    embedded_sequences = Reshape(
      (MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, 1))(embedded_sequences)

    x = Conv2D(100, (5, EMBEDDING_DIM), activation='relu')(embedded_sequences)
    x = MaxPooling2D((MAX_SEQUENCE_LENGTH - 5 + 1, 1))(x)

    # add second conv filter.
    y = Conv2D(100, (4, EMBEDDING_DIM), activation='relu')(embedded_sequences)
    y = MaxPooling2D((MAX_SEQUENCE_LENGTH - 4 + 1, 1))(y)

    # add third conv filter.
    z = Conv2D(100, (3, EMBEDDING_DIM), activation='relu')(embedded_sequences)
    z = MaxPooling2D((MAX_SEQUENCE_LENGTH - 3 + 1, 1))(z)

    # concate the conv layers
    alpha = concatenate([x,y,z])
    # flatted the pooled features.
    alpha = Flatten()(alpha)

    # dropout
    alpha = Dropout(0.5)(alpha)
    # predictions
    preds = Dense(1, activation='sigmoid')(alpha)

    # build model
    model = Model(sequence_input, preds)
    opt = optimizers.Adam(lr=0.0001)
        
    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=['acc'])

    self.model = model
    return
def TEC_basic(config, f_prev=None, m_prev=None):
    '''
        config: parameter settings of the model
        f_prev: feature output from the model trained on the previous time domain
        m_prev: model params from the previous model
    '''
    wt_matrix = np.load('./wt/' + config['dname'] + '.npy')

    # some model compile parameters
    #    opt = keras.optimizers.SGD(.0001)
    #    opt = keras.optimizers.RMSprop(.0001)
    opt = keras.optimizers.Adam(.0001)

    if config['pred_num'] == 3:
        pred_func = 'softmax'
        model_loss = {'pred': 'categorical_crossentropy'}
    else:
        config['pred_num'] = 1
        pred_func = 'sigmoid'
        model_loss = {'pred': 'binary_crossentropy'}

    # design inputs
    input_doc = Input(
        shape=(int(config['seq_max_len']), ),
        dtype='int32',
        name='input_doc',
    )
    input_left = Input(
        shape=(int(config['seq_max_len']), ),
        dtype='int32',
        name='input_left',
    )
    input_right = Input(
        shape=(int(config['seq_max_len']), ),
        dtype='int32',
        name='input_right',
    )
    # define inputs
    inputs = [input_doc, input_left, input_right]

    if f_prev:
        input_prev = Input(
            shape=(2 * int(config['rnn_size']), ),  # output the same shape
            dtype='int32',
            name='input_prev')

    # build embedding
    embed = Embedding(
        wt_matrix.shape[0],
        wt_matrix.shape[1],
        weights=[wt_matrix],
        input_length=int(config['seq_max_len']),
        trainable=False,  # according to author open source codes
        name='embed')

    embed_doc = embed(input_doc)
    embed_left = embed(input_left)
    embed_right = embed(input_right)

    # left and right are the contexts, connect with LSTM, reverse the right
    left_lstm = LSTM(wt_matrix.shape[1], name='left_lstm')(embed_left)
    left_lstm = RepeatVector(int(config['seq_max_len']))(left_lstm)
    right_lstm = LSTM(wt_matrix.shape[1], go_backwards=True,
                      name='right_lstm')(embed_right)
    right_lstm = RepeatVector(int(config['seq_max_len']))(right_lstm)

    # concatenated
    concat = keras.layers.concatenate([left_lstm, embed_doc, right_lstm],
                                      axis=-1)

    # convolution
    conv = Conv1D(300,
                  3,
                  strides=1,
                  padding='valid',
                  activation='relu',
                  use_bias=False,
                  name='conv')(concat)
    pool = MaxPooling1D(name='pool', strides=None, padding='valid')(conv)
    flatten = Flatten(name='flatten')(pool)

    # add f_prev if it is not None
    if f_prev:
        concat_f = keras.layers.concatenate([input_prev, flatten], axis=-1)

        # a dense layer with dropout
        concat_f = Dense(2 * int(config['rnn_size']),
                         activation='relu')(concat_f)
        concat_f = Dropout(0.5)(concat_f)

        # prediction
        pred = Dense(config['pred_num'], activation=pred_func,
                     name='pred')(concat_f)

        # define inputs
        inputs.append(input_prev)

    else:
        # add a dropout
        f_dp = Dropout(0.5)(flatten)

        # prediction
        pred = Dense(config['pred_num'], activation=pred_func,
                     name='pred')(f_dp)  #'linear'

    # compile model
    my_model = Model(inputs=inputs, outputs=pred)
    my_model.compile(loss=model_loss, optimizer=opt, metrics=['accuracy'])

    print(my_model.summary())
    return my_model
Example #12
0
version = keras.__version__
major_version = int(version[0])

n_in = 4
n_out = 6
output_dim = 5
input_length = 10
mb = 42
kernel = 3

embedding_dim = 50
max_words = 200
input_length = 10

model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=input_length))
model.add(Convolution1D(128, kernel_size=3,
                        activation='relu'))  # 10 - 3 + 1 = 8
model.add(Convolution1D(64, kernel_size=3,
                        activation='relu'))  # 10 - 3 + 1 = 6
model.add(Convolution1D(32, kernel_size=3,
                        activation='relu'))  # 10 - 3 + 1 = 4
model.add(Flatten())  # 128 = 32 * 4
model.add(Dropout(0.2))
model.add(Dense(128, activation='sigmoid'))  # W = 128 x 128
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='mse', optimizer='adam')
Example #13
0
# 
# Since we're training a language model, there should also be:
# * An embedding layer that converts character id x_t to a vector.
# * An output layer that predicts probabilities of next phoneme

# In[10]:

import keras
from keras.layers import concatenate,Dense,Embedding

rnn_num_units = 64
embedding_size = 16

#Let's create layers for our recurrent network
#Note: we create layers but we don't "apply" them yet
embed_x = Embedding(n_tokens,embedding_size) # an embedding layer that converts character ids into embeddings


#a dense layer that maps input and previous state to new hidden state, [x_t,h_t]->h_t+1
get_h_next = Dense(rnn_num_units, activation = 'tanh')###YOUR CODE HERE

#a dense layer that maps current hidden state to probabilities of characters [h_t+1]->P(x_t+1|h_t+1)
get_probas = Dense(n_tokens, activation = 'softmax')###YOUR CODE HERE 

#Note: please either set the correct activation to Dense or write it manually in rnn_one_step


# In[11]:

def rnn_one_step(x_t, h_t):
    """
def compileModel(classes,
                 embedding_matrix,
                 EMBEDDING_DIM=200,
                 chunk_size=1000,
                 CONVOLUTION_FEATURE=256,
                 BORDER_MODE='valid',
                 LSTM_FEATURE=256,
                 DENSE_FEATURE=256,
                 DROP_OUT=0.5,
                 LEARNING_RATE=0.01,
                 MOMENTUM=0.9):
    global sgd

    ngram_filters = [3, 4]  # Define ngrams list, 3-gram, 4-gram, 5-gram
    convs = []

    graph_in = Input(shape=(chunk_size, EMBEDDING_DIM))

    for n_gram in ngram_filters:
        conv = Convolution1D(  # Layer X,   Features: 256, Kernel Size: ngram
            nb_filter=
            CONVOLUTION_FEATURE,  # Number of kernels or number of filters to generate
            filter_length=n_gram,  # Size of kernels, ngram
            activation='relu')(graph_in)  # Activation function to use

        pool = MaxPooling1D(  # Layer X a,  Max Pooling: 3
            pool_length=3)(conv)  # Size of kernels

        lstm = LSTM(  # Layer X b,  Output Size: 256
            output_dim=LSTM_FEATURE)(pool)  # Features: 256

        convs.append(lstm)

    model = Sequential()

    model.add(
        Embedding(  # Layer 0, Start
            input_dim=nb_words + 1,  # Size to dictionary, has to be input + 1
            output_dim=EMBEDDING_DIM,  # Dimensions to generate
            weights=[embedding_matrix],  # Initialize word weights
            input_length=
            chunk_size,  # Define length to input sequences in the first layer
            trainable=False))  # Disable weight changes during training

    model.add(Dropout(0.25))  # Dropout 25%

    out = Merge(mode='concat')(
        convs)  # Layer 1,  Output Size: Concatted ngrams feature maps

    graph = Model(input=graph_in, output=out)  # Concat the ngram convolutions

    model.add(graph)  # Concat the ngram convolutions

    model.add(Dropout(DROP_OUT))  # Dropout 50%

    model.add(
        Dense(  # Layer 3,  Output Size: 256
            output_dim=DENSE_FEATURE,  # Output dimension
            activation='relu'))  # Activation function to use

    model.add(
        Dense(  # Layer 4,  Output Size: Size Unique Labels, Final
            output_dim=classes,  # Output dimension
            activation='softmax'))  # Activation function to use

    sgd = SGD(lr=LEARNING_RATE, momentum=MOMENTUM, nesterov=True)

    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

    print("Done compiling.")
    return model
Example #15
0
input_context = Input(shape=(maxlen_input, ),
                      dtype='int32',
                      name='the context text')
input_answer = Input(shape=(maxlen_input, ),
                     dtype='int32',
                     name='the answer text up to the current token')
LSTM_encoder = LSTM(sentence_embedding_size,
                    init='lecun_uniform',
                    name='Encode context')
LSTM_decoder = LSTM(sentence_embedding_size,
                    init='lecun_uniform',
                    name='Encode answer up to the current token')

Shared_Embedding = Embedding(output_dim=word_embedding_size,
                             input_dim=dictionary_size,
                             input_length=maxlen_input,
                             name='Shared')
word_embedding_context = Shared_Embedding(input_context)
context_embedding = LSTM_encoder(word_embedding_context)

word_embedding_answer = Shared_Embedding(input_answer)
answer_embedding = LSTM_decoder(word_embedding_answer)

merge_layer = merge(
    [context_embedding, answer_embedding],
    mode='concat',
    concat_axis=1,
    name=
    'concatenate the embeddings of the context and the answer up to current token'
)
out = Dense(dictionary_size / 2, activation="relu",
    image = preprocess(image)
    # fea_vec = model_inception_notop.predict(image)
    # fea_vec = np.reshape(fea_vec, fea_vec.shape[1])
    return image

# Define image caption model
# inputs1 = Input(shape=(2048,))
inputs1 = model_inception_complete.input

# fe1 = Dropout(0.5)(inputs1)
fe1 = Dropout(0.5)(model_inception_complete.layers[-2].output)

fe2 = Dense(256, activation='relu')(fe1)

inputs2 = Input(shape=(max_length,))
se1 = Embedding(vocab_size, embedding_dim, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)

decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)

model = Model(inputs=[inputs1, inputs2], outputs=outputs)

model.load_weights(model_weights, by_name=True)

def greedySearch(photo):
    in_text = 'startseq'
    for i in range(max_length):
        sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

x_train = data[:-nb_validation_samples]
y_train = labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = labels[-nb_validation_samples:]

from keras.layers import Embedding

embedding_layer = Embedding(NUM_WORDS,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False)

from keras.layers import Dense, Input, GlobalMaxPooling1D, GlobalAveragePooling1D
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.models import Model

sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
x = Conv1D(128, 5, activation='relu')(embedded_sequences)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = GlobalMaxPooling1D()(x)
Example #18
0
import numpy as np

# nums = np.arange(1, 101)
# n_samples = 1000

# samples = np.array([np.random.randint(0, n_items, adj_size) for i in range(n_samples)])
# labels = np.array([(np.argsort(line) == 4).astype('int') for line in samples])

n_items = 50
adj_size = 10
epoches = 100
nn = np.arange(50)
samples = np.array([nn[i:i + 10] for i in range(len(nn) - 10)])
samples = np.tile(samples, epoches).reshape(-1, 10)
np.random.shuffle(samples)
labels = np.array([(np.argsort(line) == 4).astype('int') for line in samples])
Y = np.array([line[np.argsort(line) == 4] for line in samples])

model = Sequential()
model.add(Embedding(input_dim=n_items, output_dim=8, input_length=adj_size))
# print(samples[0])
model.add(GlobalAvgPool1D())
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(samples, labels, epochs=1000, batch_size=50, validation_split=0.3)
# cc=model.predict(np.array([1,2,3,4,5,6,7,8,9,10]).reshape(-1,10))
# print(cc)
Example #19
0
    def build_model(self, config, weights):
        bgrnn_model = Sequential()
        bgrnn_model.add(
            Embedding(
                config['max_features'],
                config['embedding_dims'],
                input_length=config['input_length'],
                weights=[weights['Wemb']] if 'Wemb' in weights else None))
        bgrnn_model.add(
            Bidirectional(
                GRU(config['rnn_output_dims'],
                    dropout_W=config['dropout_W'],
                    dropout_U=config['dropout_U'])))

        blstm_model = Sequential()
        blstm_model.add(
            Embedding(
                config['max_features'],
                config['embedding_dims'],
                input_length=config['input_length'],
                weights=[weights['Wemb']] if 'Wemb' in weights else None))
        blstm_model.add(
            Bidirectional(
                LSTM(config['rnn_output_dims'],
                     dropout_W=config['dropout_W'],
                     dropout_U=config['dropout_U'])))

        cnn_model = Sequential()
        cnn_model.add(
            Embedding(
                config['max_features'],
                config['embedding_dims'],
                input_length=config['input_length'],
                weights=[weights['Wemb']] if 'Wemb' in weights else None))
        #dropout = 0.2))

        # cnn_model.add(ZeroPadding1D(int(config['filter_length_1'] / 2)))
        cnn_model.add(
            Convolution1D(nb_filter=config['nb_filter_1'],
                          filter_length=config['filter_length_1'],
                          border_mode='valid',
                          activation='relu',
                          subsample_length=1))

        cnn_model.add(GlobalMaxPooling1D())
        # cnn_model.add(Dense(config['hidden_dims']))
        # cnn_model.add(Activation('sigmoid'))

        # merged model
        merged_model = Sequential()
        merged_model.add(
            Merge([bgrnn_model, blstm_model, cnn_model],
                  mode='concat',
                  concat_axis=1))

        merged_model.add(Dropout(self.config['dropout']))

        if config['nb_classes'] > 2:
            merged_model.add(
                Dense(config['nb_classes'],
                      activation='softmax',
                      name="dense_e"))
            loss_type = 'categorical_crossentropy'
        else:
            merged_model.add(Dense(1, activation='sigmoid', name="dense_d"))
            loss_type = 'binary_crossentropy'

        merged_model.compile(loss=loss_type,
                             optimizer=self.get_optimizer(config['optimizer']),
                             metrics=['accuracy'])

        return merged_model
Example #20
0
def create_model(X_vocab_len, X_max_len, n_phonetic_features, n1, n2, n3, n4,
                 n5, n6, HIDDEN_DIM, LAYER_NUM):
    def smart_merge(vectors, **kwargs):
        return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs)

    current_word = Input(shape=(X_max_len, ), dtype='float32',
                         name='input1')  # for encoder (shared)
    decoder_input = Input(shape=(X_max_len, ), dtype='float32',
                          name='input3')  # for decoder -- attention
    right_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input4')
    right_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input5')
    right_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input6')
    right_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input7')
    left_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input8')
    left_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input9')
    left_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input10')
    left_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input11')
    phonetic_input = Input(shape=(n_phonetic_features, ),
                           dtype='float32',
                           name='input12')

    emb_layer1 = Embedding(X_vocab_len,
                           EMBEDDING_DIM,
                           input_length=X_max_len,
                           mask_zero=False,
                           name='Embedding')

    list_of_inputs = [
        current_word, right_word1, right_word2, right_word3, right_word4,
        left_word1, left_word2, left_word3, left_word4
    ]

    current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \
     left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4 = [emb_layer1(i) for i in list_of_inputs]

    print("Type:: ", type(current_word_embedding))
    list_of_embeddings1 = [current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \
     left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4]

    list_of_embeddings = [
        Dropout(0.50, name='drop1_' + str(j))(i)
        for i, j in zip(list_of_embeddings1, range(len(list_of_embeddings1)))
    ]
    list_of_embeddings = [
        GaussianNoise(0.05, name='noise1_' + str(j))(i)
        for i, j in zip(list_of_embeddings, range(len(list_of_embeddings)))
    ]

    conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4, conv4_left1, conv4_left2, conv4_left3, conv4_left4 =\
      [Conv1D(filters=no_filters,
       kernel_size=4, padding='valid',activation='relu',
       strides=1, name='conv4_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))]

    conv4s = [
        conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4,
        conv4_left1, conv4_left2, conv4_left3, conv4_left4
    ]
    maxPool4 = [
        MaxPooling1D(name='max4_' + str(j))(i)
        for i, j in zip(conv4s, range(len(conv4s)))
    ]
    avgPool4 = [
        AveragePooling1D(name='avg4_' + str(j))(i)
        for i, j in zip(conv4s, range(len(conv4s)))
    ]

    pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, pool4_left1, pool4_left2, pool4_left3, pool4_left4 = \
     [merge([i,j], name='merge_conv4_'+str(k)) for i,j,k in zip(maxPool4, avgPool4, range(len(maxPool4)))]

    conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4, conv5_left1, conv5_left2, conv5_left3, conv5_left4 = \
      [Conv1D(filters=no_filters,
       kernel_size=5,
       padding='valid',
       activation='relu',
       strides=1, name='conv5_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))]

    conv5s = [
        conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4,
        conv5_left1, conv5_left2, conv5_left3, conv5_left4
    ]
    maxPool5 = [
        MaxPooling1D(name='max5_' + str(j))(i)
        for i, j in zip(conv5s, range(len(conv5s)))
    ]
    avgPool5 = [
        AveragePooling1D(name='avg5_' + str(j))(i)
        for i, j in zip(conv5s, range(len(conv5s)))
    ]

    pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, pool5_left1, pool5_left2, pool5_left3, pool5_left4 = \
     [merge([i,j], name='merge_conv5_'+str(k)) for i,j,k in zip(maxPool5, avgPool5, range(len(maxPool5)))]


    maxPools = [pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, \
     pool4_left1, pool4_left2, pool4_left3, pool4_left4, \
     pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, \
     pool5_left1, pool5_left2, pool5_left3, pool5_left4]

    concat = merge(maxPools, mode='concat', name='main_merge')

    x = Dropout(0.15, name='drop_single1')(concat)
    x = Bidirectional(RNN(rnn_output_size), name='bidirec1')(x)

    total_features = [x, phonetic_input]
    concat2 = merge(total_features, mode='concat', name='phonetic_merging')

    x = Dense(HIDDEN_DIM,
              activation='relu',
              kernel_initializer='he_normal',
              kernel_constraint=maxnorm(3),
              bias_constraint=maxnorm(3),
              name='dense1')(concat2)
    x = Dropout(0.15, name='drop_single2')(x)
    x = Dense(HIDDEN_DIM,
              kernel_initializer='he_normal',
              activation='tanh',
              kernel_constraint=maxnorm(3),
              bias_constraint=maxnorm(3),
              name='dense2')(x)
    x = Dropout(0.15, name='drop_single3')(x)

    out1 = Dense(n1,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output1')(x)
    out2 = Dense(n2,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output2')(x)
    out3 = Dense(n3,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output3')(x)
    out4 = Dense(n4,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output4')(x)
    out5 = Dense(n5,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output5')(x)
    out6 = Dense(n6,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output6')(x)

    # Luong et al. 2015 attention model
    emb_layer = Embedding(X_vocab_len,
                          EMBEDDING_DIM,
                          input_length=X_max_len,
                          mask_zero=True,
                          name='Embedding_for_seq2seq')

    current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \
     left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4 = [emb_layer(i) for i in list_of_inputs]

    # current_word_embedding = smart_merge([ current_word_embedding, right_word_embedding1,  left_word_embedding1])

    encoder, state = GRU(rnn_output_size,
                         return_sequences=True,
                         unroll=True,
                         return_state=True,
                         name='encoder')(current_word_embedding)
    encoder_last = encoder[:, -1, :]

    decoder = emb_layer(decoder_input)
    decoder = GRU(rnn_output_size,
                  return_sequences=True,
                  unroll=True,
                  name='decoder')(decoder, initial_state=[encoder_last])

    attention = dot([decoder, encoder], axes=[2, 2], name='dot')
    attention = Activation('softmax', name='attention')(attention)

    context = dot([attention, encoder], axes=[2, 1], name='dot2')
    decoder_combined_context = concatenate([context, decoder],
                                           name='concatenate')

    outputs = TimeDistributed(Dense(64, activation='tanh'),
                              name='td1')(decoder_combined_context)
    outputs = TimeDistributed(Dense(X_vocab_len, activation='softmax'),
                              name='td2')(outputs)

    all_inputs = [current_word, decoder_input, right_word1, right_word2, right_word3, right_word4, left_word1, left_word2, left_word3,\
         left_word4, phonetic_input]
    all_outputs = [outputs, out1, out2, out3, out4, out5, out6]

    model = Model(input=all_inputs, output=all_outputs)
    opt = Adam()

    return model
Example #21
0
print('X_test shape:', X_test.shape)

Y_train = np_utils.to_categorical(Y_train)
Y_test = np_utils.to_categorical(Y_test)
print('Y_train shape:', Y_train.shape)
print('Y_test shape:', Y_test.shape)

#sys.exit(1)
print('Build model...')
model = Sequential()

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(
    Embedding(len(train_chars),
              embedding_dims,
              input_length=maxlen,
              dropout=0.2))

# we add a Convolution1D, which will learn nb_filter
# word group filters of size filter_length:
model.add(
    Convolution1D(nb_filter=nb_filter,
                  filter_length=filter_length,
                  activation='relu',
                  subsample_length=1))
# we use max pooling:
model.add(MaxPooling1D(pool_length=3))

# We flatten the output of the conv layer,
# so that we can add a vanilla dense layer:
model.add(Flatten())
def main(argv):
    print '*' * 20
    print 'Loading dataset...'
    sys.stdout.flush()
    #dataset of activities
    DATASET = DATASET_CSV
    df_dataset = pd.read_csv(DATASET,
                             parse_dates=[[0, 1]],
                             header=None,
                             index_col=0,
                             sep=' ')
    df_dataset.columns = ['sensor', 'action', 'event', 'activity']
    df_dataset.index.names = ["timestamp"]
    # we only need the actions without the period to calculate the onehot vector for y, because we are only predicting the actions
    unique_actions = json.load(open(UNIQUE_ACTIONS, 'r'))
    total_actions = len(unique_actions)

    print '*' * 20
    print 'Preparing dataset...'
    sys.stdout.flush()
    # Prepare sequences using action indices
    # Each action will be an index which will point to an action vector
    # in the weights matrix of the Embedding layer of the network input
    X_actions, X_times, y, tokenizer = prepare_x_y(df_dataset, unique_actions)
    # Create the embedding matrix for the embedding layer initialization
    embedding_matrix = create_embedding_matrix(tokenizer)

    #divide the examples in training and validation
    total_examples = len(X_actions)
    test_per = 0.2
    limit = int(test_per * total_examples)
    X_actions_train = X_actions[limit:]
    X_times_train = X_times[limit:]
    X_actions_test = X_actions[:limit]
    X_times_test = X_times[:limit]
    y_train = y[limit:]
    y_test = y[:limit]
    print 'Different actions:', total_actions
    print 'Total examples:', total_examples
    print 'Train examples:', len(X_actions_train), len(y_train)
    print 'Test examples:', len(X_actions_test), len(y_test)
    sys.stdout.flush()
    X_actions_train = np.array(X_actions_train)
    X_times_train = np.array(X_times_train)
    y_train = np.array(y_train)
    X_actions_test = np.array(X_actions_test)
    X_times_test = np.array(X_times_test)
    y_test = np.array(y_test)
    print 'Shape (X,y):'
    print X_actions_train.shape
    print X_times_train.shape
    print y_train.shape

    print '*' * 20
    print 'Building model...'
    sys.stdout.flush()
    # Actions embeddings branch
    input_actions = Input(shape=(INPUT_ACTIONS, ),
                          dtype='int32',
                          name='input_actions')
    embedding_actions = Embedding(input_dim=embedding_matrix.shape[0],
                                  output_dim=embedding_matrix.shape[1],
                                  weights=[embedding_matrix],
                                  input_length=INPUT_ACTIONS,
                                  trainable=True,
                                  name='embedding_actions')(input_actions)
    # Actions times branch
    input_time = Input(shape=(INPUT_ACTIONS, 2),
                       dtype='float32',
                       name='input_time')
    #reshape_1 = Reshape((INPUT_ACTIONS, 2))(input_time)
    #merge embeddings (5 x 50) and times (5 x 1), to have 5 x 51
    concat = merge([embedding_actions, input_time],
                   mode='concat',
                   concat_axis=-1)
    # Everything continues in a single branch
    lstm_1 = LSTM(512,
                  return_sequences=False,
                  input_shape=(INPUT_ACTIONS, ACTION_EMBEDDING_LENGTH + 2),
                  name='lstm_1')(concat)
    dense_1 = Dense(1024, activation='relu', name='dense_1')(lstm_1)
    drop_1 = Dropout(0.8, name='drop_1')(dense_1)
    dense_2 = Dense(1024, activation='relu', name='dense_2')(drop_1)
    drop_2 = Dropout(0.8, name='drop_2')(dense_2)
    output_actions = Dense(total_actions,
                           activation='softmax',
                           name='main_output')(drop_2)

    model = Model(input=[input_actions, input_time], output=[output_actions])

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', 'mse', 'mae'])
    print(model.summary())
    sys.stdout.flush()

    print '*' * 20
    print 'Training model...'
    sys.stdout.flush()
    BATCH_SIZE = 128
    checkpoint = ModelCheckpoint(BEST_MODEL,
                                 monitor='val_acc',
                                 verbose=0,
                                 save_best_only=True,
                                 save_weights_only=False,
                                 mode='auto')
    history = model.fit([X_actions_train, X_times_train],
                        y_train,
                        batch_size=BATCH_SIZE,
                        nb_epoch=1000,
                        validation_data=([X_actions_test,
                                          X_times_test], y_test),
                        shuffle=False,
                        callbacks=[checkpoint])

    print '*' * 20
    print 'Plotting history...'
    sys.stdout.flush()
    plot_training_info(['accuracy', 'loss'], True, history.history)

    print '*' * 20
    print 'Evaluating best model...'
    sys.stdout.flush()
    model = load_model(BEST_MODEL)
    metrics = model.evaluate([X_actions_test, X_times_test],
                             y_test,
                             batch_size=BATCH_SIZE)
    print metrics

    predictions = model.predict([X_actions_test, X_times_test], BATCH_SIZE)
    correct = [0] * 5
    prediction_range = 5
    for i, prediction in enumerate(predictions):
        correct_answer = y_test[i].tolist().index(1)
        best_n = np.sort(prediction)[::-1][:prediction_range]
        for j in range(prediction_range):
            if prediction.tolist().index(best_n[j]) == correct_answer:
                for k in range(j, prediction_range):
                    correct[k] += 1

    accuracies = []
    for i in range(prediction_range):
        print '%s prediction accuracy: %s' % (i + 1,
                                              (correct[i] * 1.0) / len(y_test))
        accuracies.append((correct[i] * 1.0) / len(y_test))

    print accuracies

    print '************ FIN ************\n' * 3
#Testing the model
tt = Tokenizer()
tt.fit_on_texts(input_test)
tvocab_size = len(tt.word_index) + 1
# integer encode the documents
tencoded_docs = tt.texts_to_sequences(input_test)
#print(encoded_docs)
# pad documents to a max length of 4 words
tpadded_docs = pad_sequences(tencoded_docs, maxlen=max_length, padding='post')
#print(padded_docs)


# define model 
model = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=max_length, trainable=False)
model.add(e)
model.add(GRU(gru_output_size, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))
model.add(Flatten())
model.add(Dense(nclass, activation='softmax'))


# compile the model
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
# summarize the model
print(model.summary())
# fit the model
model.fit(padded_docs,y_train, epochs=1, verbose=0, validation_data=(vpadded_docs, y_valid))

# evaluate the model
loss, accuracy = model.evaluate(tpadded_docs, y_test, verbose=0)
Example #24
0
print(token.word_index)
# {'너무': 1, '참': 2, '재밌어요': 3, '최고에요': 4, '잘': 5, '만든': 6, '영화에요': 7, '추천하고': 8, '싶은': 9, '영화': 10, '입니다': 11,
# '한번': 12, '더': 13, '보고': 14, '싶네요': 15, '글쎄요': 16, '별로에요': 17,
# '생각보다': 18, '지루해요': 19, '연기가': 20, '어색해요': 21, '재미없어요': 22, '재미없다': 23, '재밌네요': 24}
# 자주나온 단어는 인덱스를 앞으로 준다.

x = token.texts_to_sequences(docs)
print(x)
#[[1, 3], [4], [2, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16], [17], [18, 19], [20, 21], [22], [1, 23], [2, 24]]

pad_x = pad_sequences(x, padding='pre', value=0)
print(pad_x)

word_size = len(token.word_index) + 1
print(word_size)

model = Sequential()
model.add(Embedding(25, 10, input_length=4))
model.add(Conv1D(10, 2))
model.add(Conv1D(10, 2))
model.add(MaxPool1D())
# model.add(Embedding(word_size,10,input_length=4))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.fit(pad_x, labels, epochs=30)
acc = model.evaluate(pad_x, labels)[1]
print(acc)
Example #25
0
X_train = sequence.pad_sequences(X1, maxlen=maxlen)

X_test = sequence.pad_sequences(T1, maxlen=maxlen)

y_train = np.array(trainlabel)
y_test = np.array(testlabel)

hidden_dims = 128
nb_filter = 128
filter_length = 2
embedding_vecor_length = 128
pool_length = 2
lstm_output_size = 70

model = Sequential()
model.add(Embedding(max_features, embedding_vecor_length, input_length=maxlen))
model.add(
    Convolution1D(nb_filter=nb_filter,
                  filter_length=filter_length,
                  border_mode='valid',
                  activation='relu',
                  subsample_length=1))
model.add(MaxPooling1D(pool_length=pool_length))
model.add(LSTM(lstm_output_size))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.load_weights("logs/cnnlstm/checkpoint-00.hdf5")

y_pred = model.predict_classes(X_test)
accuracy = accuracy_score(y_test, y_pred)
    def cnnlstm_fit():

        start_time = time.time()

        global X_val, X_train, X_test, y_train, y_val, y_test

        print('X_train shape:', X_train.shape)
        print('X_test shape:', X_test.shape)
        y_train = np.array(y_train)

        model = Sequential()
        model.add(
            Embedding(num_words,
                      EMBEDDING_DIM,
                      embeddings_initializer=Constant(embedding_matrix),
                      input_length=MAX_SEQUENCE_LENGTH,
                      trainable=False))
        #    model.add(Embedding(num_words, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH))
        #    sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
        #    embedded_sequences = embedding_layer(sequence_input)
        model.add(Conv1D(128, 5, activation='relu'))  #(embedded_sequences)
        model.add(MaxPooling1D(pool_size=4))
        model.add(Bidirectional(LSTM(64)))
        model.add(Dropout(0.5))
        model.add(Dense(1, activation='sigmoid'))

        # try using different optimizers and different optimizer configs
        model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

        print('Train...')
        history = model.fit(X_train,
                            y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            validation_data=[X_val, y_val])

        plot_epoch_loss(history)  # plot loss curve

        prob_val = model.predict(
            X_val, verbose=0)  # make prob predictions on val data
        prob_val = [i for i in prob_val]  # get prob relevance
        tr_val = [float(i) for i in y_val]  # true label for val data as float
        fpr, tpr, thresholds = roc_curve(tr_val, prob_val)
        optco = thresholds[np.argmax(tpr > 0.95)]  # optimal prob cutoff
        pred_val = [
            1. if i > optco else 0. for i in prob_val
        ]  # pred is 1 if prob>optimal cutoff as determined from val data
        pf1 = metrics.f1_score(tr_val, pred_val)  # predicted f1
        ppr = metrics.precision_score(tr_val, pred_val)  # predicted precision
        prec = metrics.recall_score(tr_val, pred_val)  #  predicted recall
        proc = metrics.roc_auc_score(
            tr_val, prob_val)  #  predicted roc auc measured on val data
        precision, recall, pr_thresholds = precision_recall_curve(
            tr_val, prob_val)  # pr curve
        #p_prre_auc= metrics.auc(recall, precision,reorder=True)    # pr auc
        p_prre_auc = metrics.average_precision_score(tr_val,
                                                     prob_val)  # pr auc

        prob_test = model.predict(
            X_test,
            verbose=0)  # make prob predictions on unclassified (test) data
        prob_test = [i for i in prob_test]  # get prob relevance
        pred_test = [
            1. if i > optco else 0. for i in prob_test
        ]  # pred is 1 if prob on the test data > optimal cutoff  as determined from val data
        tr_test = [float(i)
                   for i in y_test]  # true label for test data as float
        af1 = metrics.f1_score(tr_test, pred_test)  #actual f1
        apr = metrics.precision_score(tr_test, pred_test)  #actual precision
        arec = metrics.recall_score(tr_test, pred_test)  #actual recall
        aroc = metrics.roc_auc_score(tr_test, prob_test)  #actual roc auc
        precision, recall, a_thresholds = precision_recall_curve(
            tr_test, prob_test)
        #a_prre_auc= metrics.auc(recall,precision,reorder=True)
        a_prre_auc = metrics.average_precision_score(tr_test,
                                                     prob_test)  # pr auc

        ndata = X_train.shape[0]
        t = (time.time() - start_time) / 3600.  # time taken in seconds
        r = [
            ndata, t, pf1, af1, ppr, apr, prec, arec, proc, aroc, p_prre_auc,
            a_prre_auc
        ]  # list of results for output

        print("Time to run CNN_LSTM classification model = --- %s hours ---" %
              ((time.time() - start_time) / 3600.))
        print(r)

        return (r)
def make_predictions(X, Y, val_X, val_Y, test_X, test_Y, s, test_ids):

    cl_w = compute_class_weight('balanced', np.unique(Y), Y)
    earlystop = EarlyStopping(monitor='val_loss', min_delta=0.01, patience=patience, \
                          verbose=1, mode='auto')

    print('Build model CNN model')

    in_txt = Input(name='in_norm',
                   batch_shape=tuple([None, maxlen]),
                   dtype='int32')

    # init with pre-trained embeddings
    emb_char = Embedding(len(word2index),
                         embedding_dims,
                         embeddings_initializer=Constant(embedding_matrix),
                         trainable=True,
                         input_length=maxlen,
                         name='emb_char')

    emb_seq = emb_char(in_txt)

    z = Dropout(dropout_prob[0])(emb_seq)

    # convolutional block
    conv_blocks = []
    for sz in filter_sizes:
        conv = Convolution1D(filters=num_filters,
                             kernel_size=sz,
                             padding="valid",
                             activation="relu",
                             strides=1,
                             kernel_regularizer=regularizers.l2(0.01),
                             kernel_initializer=initializer_func)(z)
        conv = MaxPooling1D(pool_size=2)(conv)
        conv = Flatten()(conv)
        conv_blocks.append(conv)
    z = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0]

    z = Dropout(dropout_prob[1])(z)
    z = Dense(hidden_dims,
              activation="relu",
              kernel_regularizer=regularizers.l2(0.01),
              kernel_initializer=initializer_func)(z)

    out_soft = Dense(1,
                     activation='sigmoid',
                     name='out_soft',
                     kernel_initializer=initializer_func,
                     kernel_regularizer=regularizers.l2(0.01))(z)

    model = Model(inputs=in_txt, outputs=out_soft)

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    model.fit(X,
              Y,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(val_X, val_Y),
              class_weight={
                  0: cl_w[0],
                  1: cl_w[1]
              },
              callbacks=[earlystop],
              verbose=0)

    y_hat = model.predict(val_X, batch_size=batch_size)
    y_hat = y_hat.flatten()
    res_list = {}
    thresholds = np.arange(0, 1, 0.1)

    f1_prod_list = []

    for p in thresholds:

        y_pred = []

        for y in y_hat:

            if y >= p:
                y_pred.append(1)
            else:
                y_pred.append(0)

        y_pred = np.array(y_pred)

        from sklearn.metrics import precision_recall_fscore_support

        precision, recall, f1, _ = precision_recall_fscore_support(
            val_Y, y_pred, average=None)
        f1_prod_list.append(np.prod(f1))
        res_list[p] = y_pred

    f1_prod_list = np.array(f1_prod_list)

    max_f1 = np.argmax(f1_prod_list)
    print('Positive class probability threshold %.4f' % thresholds[max_f1])
    p = thresholds[max_f1]
    y_hat = model.predict(test_X, batch_size=batch_size)
    y_hat = y_hat.flatten()
    y_pred = []

    for y in y_hat:

        if y >= p:
            y_pred.append(1)
        else:
            y_pred.append(0)

    y_pred = np.array(y_pred)
    test_y_hat = y_pred

    prec, recall, fm, support = precision_recall_fscore_support(
        test_Y, test_y_hat)

    print('F-measure')
    print(fm)
    print('Precision')
    print(prec)
    print('Recall')
    print(recall)
    print('Stat')
    print(support)

    accuracy_score = sklearn.metrics.accuracy_score(test_Y, test_y_hat)
    print('accuracy_score: {0}'.format(accuracy_score))

    roc_auc_score = sklearn.metrics.roc_auc_score(test_Y, test_y_hat)
    print('roc_auc_score: {0}'.format(roc_auc_score))

    false_positive_rate, true_positive_rate, thresholds_roc = sklearn.metrics.roc_curve(
        test_Y, test_y_hat)

    false_pos = []
    false_neg = []

    false_pos_probs = []
    false_neg_probs = []

    pos_class_probs = []

    pos_class_probs_low = []
    pos_class_probs_high = []

    y_hat_proba = y_hat

    pos_class_probs = np.array(y_hat_proba)
    threshold = thresholds[max_f1]

    for proba in pos_class_probs:
        if proba >= threshold:
            pos_class_probs_high.append(proba)
        else:
            pos_class_probs_low.append(proba)

    pos_class_probs_low = np.array(pos_class_probs_low)
    pos_class_probs_high = np.array(pos_class_probs_high)

    threshold_low = np.median(pos_class_probs_low)
    threshold_high = np.median(pos_class_probs_high)

    for n, proba in enumerate(pos_class_probs):
        if proba <= threshold_low and test_Y[n] == 1:
            false_neg.append(test_ids[n])
            false_neg_probs.append(proba)

        elif proba >= threshold_high and test_Y[n] == 0:
            false_pos.append(test_ids[n])
            false_pos_probs.append(proba)

    print('Positive class proba distribution')
    #print(pos_class_probs)
    pos_class_probs = np.array(pos_class_probs)
    print('stat')
    print(st.describe(pos_class_probs))
    print('median')
    print(np.median(pos_class_probs))
    print('\n')

    print('False negatives with p of positive class <= %.3f' % threshold_low)
    print('admission ids')
    print(false_neg)
    if len(false_neg_probs) > 0:
        print('p distribution')
        # print(false_neg_probs)
        false_neg_probs = np.array(false_neg_probs)
        print('stat')
        print(st.describe(false_neg_probs))
        print('median')
        print(np.median(false_neg_probs))
        print('\n')

    print('False positives with p of positive class >= %.3f' % threshold_high)
    print('admission ids')
    print(false_pos)
    if len(false_pos_probs) > 0:
        print('p distribution')
        # print(false_pos_probs)
        false_pos_probs = np.array(false_pos_probs)
        print('stat')
        print(st.describe(false_pos_probs))
        print('median')
        print(np.median(false_pos_probs))
        print('\n')

    return fm[1], prec[1], recall[1], roc_auc_score
Example #28
0
vocabulary = utils.getVocabulary(trainWindows,winSize,vocabSize)

trainFeatures = utils.vectorizeWindows(trainWindows,vocabulary)
devFeatures = utils.vectorizeWindows(devWindows,vocabulary)
testFeatures = utils.vectorizeWindows(testWindows,vocabulary)

trainTargets = np.asarray(trainTargets)
devTargets = np.asarray(devTargets)
testTargets = np.asarray(testTargets)

print "Finished processing"

model = Sequential()
# Number of embedding vectors = vocabSize + UNK + <s> + <e>
model.add(Embedding(vocabSize + 3, VSIZE, input_length=winSize, input_dtype='int32'))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(trainTargets.shape[1], activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

history = model.fit(trainFeatures, trainTargets,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=1, validation_data=(devFeatures, devTargets))
score = model.evaluate(testFeatures, testTargets, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
Example #29
0
def elsa_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_dim=300,
                    embed_l2=1E-6, return_attention=False, load_embedding=False, pre_embedding=None, high=False, LSTM_hidden=512, LSTM_drop=0.5):
    """
    Returns the DeepMoji architecture uninitialized and
    without using the pretrained model weights.
    # Arguments:
        nb_classes: Number of classes in the dataset.
        nb_tokens: Number of tokens in the dataset (i.e. vocabulary size).
        maxlen: Maximum length of a token.
        feature_output: If True the model returns the penultimate
                        feature vector rather than Softmax probabilities
                        (defaults to False).
        embed_dropout_rate: Dropout rate for the embedding layer.
        final_dropout_rate: Dropout rate for the final Softmax layer.
        embed_l2: L2 regularization for the embedding layerl.
        high: use or not the highway network
    # Returns:
        Model with the given parameters.
    """
    class NonMasking(Layer):   
        def __init__(self, **kwargs):   
            self.supports_masking = True  
            super(NonMasking, self).__init__(**kwargs)   

        def build(self, input_shape):   
            input_shape = input_shape   

        def compute_mask(self, input, input_mask=None):   
            # do not pass the mask to the next layers   
            return None   

        def call(self, x, mask=None):   
            return x   

        def get_output_shape_for(self, input_shape):   
            return input_shape 
    # define embedding layer that turns word tokens into vectors
    # an activation function is used to bound the values of the embedding
    model_input = Input(shape=(maxlen,), dtype='int32')
    embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None
    if not load_embedding and pre_embedding is None:
        embed = Embedding(input_dim=nb_tokens, output_dim=embed_dim, mask_zero=True,input_length=maxlen,embeddings_regularizer=embed_reg,
                          name='embedding')
    else:
        embed = Embedding(input_dim=nb_tokens, output_dim=embed_dim, mask_zero=True,input_length=maxlen, weights=[pre_embedding],
                          embeddings_regularizer=embed_reg,trainable=True, name='embedding')
    if high:
        x = NonMasking()(embed(model_input))
    else:
        x = embed(model_input)
    x = Activation('tanh')(x)

    # entire embedding channels are dropped out instead of the
    # normal Keras embedding dropout, which drops all channels for entire words
    # many of the datasets contain so few words that losing one or more words can alter the emotions completely
    if embed_dropout_rate != 0:
        embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop')
        x = embed_drop(x)

    # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features
    # ordering of the way the merge is done is important for consistency with the pretrained model
    lstm_0_output = Bidirectional(LSTM(LSTM_hidden, return_sequences=True, dropout=LSTM_drop), name="bi_lstm_0" )(x)
    lstm_1_output = Bidirectional(LSTM(LSTM_hidden, return_sequences=True, dropout=LSTM_drop), name="bi_lstm_1" )(lstm_0_output)
    x = concatenate([lstm_1_output, lstm_0_output, x])
    if high:
        x = TimeDistributed(Highway(activation='tanh', name="high"))(x)
    # if return_attention is True in AttentionWeightedAverage, an additional tensor
    # representing the weight at each timestep is returned
    weights = None
    x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x)
    #x = MaskAverage(name='attlayer', return_attention=return_attention)(x)
    if return_attention:
        x, weights = x

    if not feature_output:
        # output class probabilities
        if final_dropout_rate != 0:
            x = Dropout(final_dropout_rate)(x)

        if nb_classes > 2:
            outputs = [Dense(nb_classes, activation='softmax', name='softmax')(x)]
        else:
            outputs = [Dense(1, activation='sigmoid', name='softmax')(x)]
    else:
        # output penultimate feature vector
        outputs = [x]

    if return_attention:
        # add the attention weights to the outputs if required
        outputs.append(weights)

    return Model(inputs=[model_input], outputs=outputs)
Example #30
0
    def build(self):
        """Construct lstm cvae model"""
        # Load embedding in Embedding layer
        embedding_matrix = self.load_embedding()
        embedding_layer = Embedding(
            self.num_words + 1,
            self.config.embedding_dim,
            weights=[embedding_matrix],
            input_length=self.config.max_sequence_length,
            trainable=False)

        # Q(z|X,y) -- encoder
        # embedded sequence input
        sequence_inputs = Input(batch_shape=(self.config.batch_size,
                                             self.config.max_sequence_length),
                                dtype='int32')
        embedded_inputs = embedding_layer(sequence_inputs)
        x = LSTM(self.config.lstm_size_encoder,
                 return_sequences=False)(embedded_inputs)
        score_inputs = Input(batch_shape=(self.config.batch_size, 1))
        x_joint = concatenate([x, score_inputs], axis=1)
        x_encoded = Dense(self.config.intermediate_size,
                          activation='tanh')(x_joint)
        z_mean = Dense(self.config.latent_size)(x_encoded)
        z_log_sigma = Dense(self.config.latent_size)(x_encoded)

        # Sample z ~ Q(z|X,y)
        def sampling(args):
            z_mean, z_log_sigma = args
            epsilon = K.random_normal(shape=(self.config.batch_size,
                                             self.config.latent_size),
                                      mean=0.,
                                      stddev=1.)
            return z_mean + K.exp(z_log_sigma / 2.) * epsilon

        z = Lambda(sampling)([z_mean, z_log_sigma])
        z_cond = concatenate([z, score_inputs], axis=1)

        # P(X|z,y) -- decoder
        z_repeated = RepeatVector(self.config.max_sequence_length)(z_cond)

        decoder_h = LSTM(self.config.lstm_size_decoder, return_sequences=True)
        decoder_out = Dense(self.num_words + 1)

        h_decoded = decoder_h(z_repeated)
        x_decoded = decoder_out(h_decoded)
        # Construct three models
        # vae
        vae = Model([sequence_inputs, score_inputs], x_decoded)
        # encoder
        encoder = Model([sequence_inputs, score_inputs], z_mean)
        # generator
        generator_z_inputs = Input(batch_shape=(self.config.batch_size,
                                                self.config.latent_size))
        generator_z_cond = concatenate([generator_z_inputs, score_inputs],
                                       axis=1)
        generator_z_repeated = RepeatVector(
            self.config.max_sequence_length)(generator_z_cond)
        generator_h_decoded = decoder_h(generator_z_repeated)
        generator_x_decoded = decoder_out(generator_h_decoded)
        generator = Model([generator_z_inputs, score_inputs],
                          generator_x_decoded)

        kl_weight = self.config.kl_weight

        def recon_loss(y_true, y_pred):
            """E[log P(X|z,y)]"""
            recon = K.mean(K.sparse_categorical_crossentropy(output=y_pred,
                                                             target=y_true,
                                                             from_logits=True),
                           axis=1)
            return recon

        def kl_loss(y_true, y_pred):
            """D_KL(Q(z|X,y) || P(z|X)); calculate in closed form as both dist. are Gaussian"""
            kl = 0.5 * K.mean(
                K.exp(z_log_sigma) + K.square(z_mean) - 1. - z_log_sigma,
                axis=1)
            kl = kl * kl_weight
            return kl

        def vae_loss(y_true, y_pred):
            """ Calculate loss = reconstruction loss + KL loss for each data in minibatch """
            recon = recon_loss(y_true, y_pred)
            kl = kl_loss(y_true, y_pred)
            return recon + kl

        vae.compile(loss=vae_loss,
                    optimizer=self.config.optimizer,
                    metrics=[recon_loss, kl_loss])

        self.vae = vae
        self.encoder = encoder
        self.generator = generator
Example #31
0
#also speeds up batch processing, it will arrange batches where sequences are same length
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)  #returns numpy array
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
y_train = np.array(y_train)  #keras only accepts numpy arrays, not python lists
y_test = np.array(y_test)
x_train = np.array(x_train)
x_test = np.array(x_test)

#instantiate sequential model
model = Sequential()

#add layers to model (in order! because you are using a sequential model)
model.add(Embedding(max_features, 128))  #embeddings are 128 dim vectors
model.add(Bidirectional(LSTM(64)))  #LSTM layer has 64 units
model.add(Dropout(0.5))  #what proportion of inputs to set to 0
model.add(
    Dense(1, activation='sigmoid')
)  #single sigmoidal output, predicting either 0 or 1, negative or positive sentiment

#compile the model
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

#train the model
print('Train...')
hist = model.fit(x_train,
                 y_train,
                 batch_size=batch_size,
                 epochs=1,
Example #32
0
def train_lstm_for_visualization():
    checkpoints = glob(MODEL_PATH + "*.h5")
    if len(checkpoints) > 0:
        checkpoints = natsorted(checkpoints)
        assert len(checkpoints) != 0, "No checkpoints for visualization found."
        checkpoint_file = checkpoints[-1]
        print("Loading [{}]".format(checkpoint_file))
        model = load_model(checkpoint_file)
        model.compile(optimizer="adam",
                      loss="categorical_crossentropy",
                      metrics=["accuracy", utils.f1_score])
        print(model.summary())

        # Load the data
        x_train, y_train, x_test, y_test, vocab_size, tokenizer, max_tweet_length = prepare_data(
            SHUFFLE)

        # Get the word to index and the index to word mappings
        word_index = tokenizer.word_index
        index_to_word = {index: word for word, index in word_index.items()}

        # Evaluate the previously trained model on test data
        test_loss, test_acc, test_fscore = model.evaluate(x_test,
                                                          y_test,
                                                          verbose=1,
                                                          batch_size=256)
        print("Loss: %.3f\nF-score: %.3f\n" % (test_loss, test_fscore))
        return model, index_to_word, x_test
    else:
        # Load the data
        x_train, y_train, x_test, y_test, vocab_size, tokenizer, max_tweet_length = prepare_data(
            SHUFFLE)

        # Get the word to index and the index to word mappings
        word_index = tokenizer.word_index
        index_to_word = {index: word for word, index in word_index.items()}

        # Build, evaluate and save the model
        model = Sequential()
        model.add(
            Embedding(input_dim=vocab_size,
                      output_dim=EMBEDDING_DIM,
                      input_length=max_tweet_length,
                      embeddings_initializer="glorot_normal",
                      name="embedding_layer"))
        model.add(
            LSTM(output_dim=HIDDEN_UNITS,
                 name="recurrent_layer",
                 activation="tanh",
                 return_sequences=True))
        model.add(Flatten())
        model.add(Dense(DENSE_UNITS, activation="relu", name="dense_layer"))
        model.add(Dense(NO_OF_CLASSES, activation="softmax"))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adadelta(),
                      metrics=["accuracy", utils.f1_score])
        model.summary()
        checkpoint = ModelCheckpoint(monitor="val_acc",
                                     filepath=MODEL_PATH +
                                     "model_{epoch:02d}_{val_acc:.3f}.h5",
                                     save_best_only=True,
                                     mode="max")
        model.fit(x_train,
                  y_train,
                  batch_size=BATCH_SIZE,
                  epochs=EPOCHS,
                  validation_data=(x_test, y_test),
                  callbacks=[checkpoint])
        score = model.evaluate(x_test, y_test)
        print("Loss: %.3f\nF-score: %.3f\n" % (score[0], score[1]))
        return model, index_to_word, x_test
                                 value=hindi2index[" "])

#one hot encoding of hindi sequence
y = [
    to_categorical(seq, num_classes=len(hindi2index))
    for seq in hindi_padded_seq
]

eng_train, eng_test, y_train, y_test = train_test_split(eng_padded_seq,
                                                        y,
                                                        test_size=0.05)

#defining architecture of network
model = Sequential()
model.add(
    Embedding(input_dim=len(eng2index), output_dim=22, input_length=max_len))
model.add(Dropout(0.20))
#model.add(Conv1D(64,4,activation="relu",padding="same"))
model.add(
    Bidirectional(
        LSTM(units=128, return_sequences=True, recurrent_dropout=0.25)))
model.add(TimeDistributed(Dense(len(hindi2index), activation="softmax")))
model.summary()

model.compile(optimizer="rmsprop",
              loss="categorical_crossentropy",
              metrics=["accuracy"])
history = model.fit(eng_train,
                    np.array(y_train),
                    validation_data=(eng_test, np.array(y_test)),
                    batch_size=32,
Example #34
0
LOG_FILE = './outputs/log-model-vggcnn-1'

# Read back data
train_reviews = np.load(path_join(ROOT_PATH, "IMDB_train_fulltext_glove_X.npy"))
train_labels = np.load(path_join(ROOT_PATH, "IMDB_train_fulltext_glove_y.npy"))
test_reviews = np.load(path_join(ROOT_PATH, "IMDB_test_fulltext_glove_X.npy"))
test_labels = np.load(path_join(ROOT_PATH, "IMDB_test_fulltext_glove_y.npy"))

WV_FILE_GLOBAL = path_join(ROOT_PATH, './embeddings/wv/glove.42B.300d.120000-glovebox.pkl')

gb_global = pickle.load(open(WV_FILE_GLOBAL, 'rb'))

wv_size = gb_global.W.shape[1]

model = Sequential()
emb = Embedding(gb_global.W.shape[0], wv_size, weights=[gb_global.W],
                    input_length=train_reviews.shape[1])
emb.trainable = False
model.add(emb)
#model.add(Permute((2,1)))
model.add(Convolution1D(128, 3, subsample_length=2, init='he_uniform'))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Convolution1D(128, 3, subsample_length=2, init='he_uniform'))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Convolution1D(128, 3, subsample_length=2, init='he_uniform'))
model.add(Activation('relu'))
model.add(Dropout(0.5))
Example #35
0
def Keras0_helper(_X_tr,
                  _X_va,
                  _X_te,
                  predictors,
                  cat_feats,
                  params,
                  seed=2018):
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(seed)
    rn.seed(seed)
    X_tr = _X_tr[predictors]
    X_va = _X_va[predictors]
    X_te = _X_te[predictors]
    y_tr = _X_tr['is_attributed']
    y_va = _X_va['is_attributed']
    y_te = _X_te['is_attributed']
    print('*************params**************')
    for f in sorted(params):
        print(f + ":", params[f])
    batch_size = int(params['batch_size'])
    epochs_for_lr = float(params['epochs_for_lr'])
    max_epochs = int(params['max_epochs'])
    emb_cate = int(params['emb_cate'])
    dense_cate = int(params['dense_cate'])
    dense_nume_n_layers = int(params['dense_nume_n_layers'])
    drop = float(params['drop'])
    lr = float(params['lr'])
    lr_init = float(params['lr_init'])
    lr_fin = float(params['lr_fin'])
    n_layers = int(params['n_layers'])
    patience = int(params['patience'])
    train_dict = {}
    valid_dict = {}
    test_dict = {}
    input_list = []
    emb_list = []
    numerical_feats = []
    tot_emb_n = 0
    for col in X_tr:
        if col not in cat_feats:
            numerical_feats.append(col)
    if len(cat_feats) > 0:
        for col in cat_feats:
            train_dict[col] = np.array(X_tr[col])
            valid_dict[col] = np.array(X_va[col])
            test_dict[col] = np.array(X_te[col])
            inpt = Input(shape=[1], name=col)
            input_list.append(inpt)
            max_val = np.max(
                [X_tr[col].max(), X_va[col].max(), X_te[col].max()]) + 1
            emb_n = np.min([emb_cate, max_val])
            if get_opt('fixEmb', 'on') == 'on':
                emb_n = emb_cate
            tot_emb_n += emb_n
            if emb_n == 1:
                print(
                    "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Warinig!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! emb_1 = 1"
                )
                return 0
            print('Embedding size:', max_val, emb_cate, X_tr[col].max(),
                  X_va[col].max(), X_te[col].max(), emb_n, col)
            embd = Embedding(max_val, emb_n)(inpt)
            emb_list.append(embd)
        if len(emb_list) == 1:
            print(
                "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Warinig!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! emb_list = 1"
            )
            return 0
        fe = concatenate(emb_list)
        s_dout = SpatialDropout1D(drop)(fe)
        x1 = Flatten()(s_dout)

    if get_opt('sameNDenseAsEmb', '-') == 'on':
        dense_cate = tot_emb_n
    if len(numerical_feats) > 0:
        train_dict['numerical'] = X_tr[numerical_feats].values
        valid_dict['numerical'] = X_va[numerical_feats].values
        test_dict['numerical'] = X_te[numerical_feats].values
        inpt = Input((len(numerical_feats), ), name='numerical')
        input_list.append(inpt)
        x2 = inpt
        for n in range(dense_nume_n_layers):
            x2 = Dense(dense_cate,
                       activation='relu',
                       kernel_initializer=RandomUniform(seed=seed))(x2)
            if get_opt('numeDropout', 'on') != 'off':
                x2 = Dropout(drop)(x2)
            if get_opt('NumeBatchNormalization', 'on') != 'off':
                x2 = BatchNormalization()(x2)

    if len(numerical_feats) > 0 and len(cat_feats) > 0:
        x = concatenate([x1, x2])
    elif len(numerical_feats) > 0:
        x = x2
    elif len(cat_feats) > 0:
        x = x1
    else:
        return 0  # for small data test

    for n in range(n_layers):
        x = Dense(dense_cate,
                  activation='relu',
                  kernel_initializer=RandomUniform(seed=seed))(x)
        if get_opt('lastDropout', 'on') != 'off':
            x = Dropout(drop)(x)
        if get_opt('BatchNormalization', 'off') == 'on' or get_opt(
                'LastBatchNormalization', 'off') == 'on':
            x = BatchNormalization()(x)
    outp = Dense(1,
                 activation='sigmoid',
                 kernel_initializer=RandomUniform(seed=seed))(x)
    model = Model(inputs=input_list, outputs=outp)
    if get_opt('optimizer', 'expo') == 'adam':
        optimizer = Adam(lr=lr)
    elif get_opt('optimizer', 'expo') == 'nadam':
        optimizer = Nadam(lr=lr)
    else:
        exp_decay = lambda init, fin, steps: (init / fin)**(1 /
                                                            (steps - 1)) - 1
        steps = int(len(X_tr) / batch_size) * epochs_for_lr
        lr_init, lr_fin = 0.001, 0.0001
        lr_decay = exp_decay(lr_init, lr_fin, steps)
        optimizer = Adam(lr=lr, decay=lr_decay)
    model.compile(loss='binary_crossentropy', optimizer=optimizer)
    model.summary()
    #from keras.utils import plot_model
    #plot_model(model, to_file='model.png')

    model_file = '../work/weights.' + str(os.getpid()) + '.hdf5'
    if get_opt('trainCheck', '-') == 'on':
        training_data = (train_dict, y_tr)
    else:
        training_data = False
    if get_opt('testCheck', '-') == 'on':
        testing_data = (test_dict, y_te)
    else:
        testing_data = False
    aucEarlyStopping = EarlyStopping(training_data=training_data,
                                     validation_data=(valid_dict, y_va),
                                     testing_data=testing_data,
                                     patience=patience,
                                     model_file=model_file,
                                     verbose=1)
    model.fit(train_dict,
              y_tr,
              validation_data=[valid_dict, y_va],
              batch_size=batch_size,
              epochs=max_epochs,
              shuffle=True,
              verbose=2,
              callbacks=[aucEarlyStopping])
    best_epoch = aucEarlyStopping.best_epoch
    print('loading', model_file + '.' + str(best_epoch))
    model.load_weights(model_file + '.' + str(best_epoch))
    _X_te['pred'] = model.predict(test_dict, batch_size=batch_size,
                                  verbose=2)[:, 0]
    _X_va['pred'] = model.predict(valid_dict, batch_size=batch_size,
                                  verbose=2)[:, 0]
    if get_opt('avgEpoch', 0) > 0:
        added = 1
        for i in range(min(get_opt('avgEpoch', 0), patience)):
            best_epoch = aucEarlyStopping.best_epoch + (i + 1)
            if best_epoch >= max_epochs:
                continue
            print('loading', model_file + '.' + str(best_epoch))
            model.load_weights(model_file + '.' + str(best_epoch))
            _X_te['pred'] += model.predict(
                test_dict, batch_size=batch_size, verbose=2)[:, 0] * 0.5
            _X_va['pred'] += model.predict(
                valid_dict, batch_size=batch_size, verbose=2)[:, 0] * 0.5
            added += 0.5
            best_epoch = aucEarlyStopping.best_epoch - (i + 1)
            if best_epoch < 0:
                continue
            print('loading', model_file + '.' + str(best_epoch))
            model.load_weights(model_file + '.' + str(best_epoch))
            _X_te['pred'] += model.predict(
                test_dict, batch_size=batch_size, verbose=2)[:, 0] * 0.5
            _X_va['pred'] += model.predict(
                valid_dict, batch_size=batch_size, verbose=2)[:, 0] * 0.5
            added += 0.5
        _X_te['pred'] /= added
        _X_va['pred'] /= added

    os.system('rm -f ' + model_file + '.*')
    auc = roc_auc_score(y_va, _X_va.pred)
    return auc
EMBEDDING_DIM = 100
tokenizer = Tokenizer(num_words=MAX_NB_WORDS,
                      filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~',
                      lower=True)
tokenizer.fit_on_texts(df['description'].values)
word_index = tokenizer.word_index
X = tokenizer.texts_to_sequences(df['description'].values)
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
Y = pd.get_dummies(df['category']).values
X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.20,
                                                    random_state=42)

model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(6, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

epochs = 5
batch_size = 64

#history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size,validation_split=0.1,callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)])
#model.save_weights("lstmweightsnew.h5")

model.load_weights("lstmweightsnew.h5")
accr = model.evaluate(X_test, Y_test)
Example #37
0
def main(argv):
    """" Main function
    
    This is the flow of actions of this main
    0: Initial steps
    1: Load data (X and y_emb) and needed dictionaries (activity-to-int, etc.)    
    2: Generate K partitions of the dataset (KFold cross-validation)
    3: For each partition (train, test):
       3.1: Build the LSTM model
       3.2: Manage imbalanced data in the training set (SMOTE?)
       3.3: Train the model with the imbalance-corrected training set and use the test set to validate
       3.4: Store the generated learning curves and metrics with the best model (ModelCheckpoint? 
               If results get worse with epochs, use EarlyStopping)
    4: Calculate the mean and std for the metrics obtained for each partition and store
    """
    # 0: Initial steps
    print_configuration_info()
    # fix random seed for reproducibility
    np.random.seed(7)
    # Make an instance of the class Utils
    utils = Utils()

    # Obtain the file number
    maxnumber = utils.find_file_maxnumber(RESULTS + DATASET + '/')
    filenumber = maxnumber + 1
    print('file number: ', filenumber)

    # 1: Load data (X and y_emb)
    print('Loading data')

    # Load activity_dict where every activity name has its associated word embedding
    with open(ACTIVITY_EMBEDDINGS) as f:
        activity_dict = json.load(f)

    # Load the activity indices
    with open(ACTIVITY_TO_INT) as f:
        activity_to_int_dict = json.load(f)

    # Load the index to activity relations
    with open(INT_TO_ACTIVITY) as f:
        int_to_activity = json.load(f)

    # Load embedding matrix, X and y sequences (for y, load both, the embedding and index version)
    embedding_matrix = np.load(EMBEDDING_WEIGHTS)
    X = np.load(X_FILE)
    y_emb = np.load(Y_EMB_FILE)
    # We need the following two lines for StratifiedKFold
    y_index_one_hot = np.load(Y_INDEX_FILE)
    y_index = np.argmax(y_index_one_hot, axis=1)

    # To use oversampling methods in imbalance-learn, we need an activity_index:embedding relation
    # Build it using INT_TO_ACTIVITY and ACTIVITY_EMBEDDINGS files
    activity_index_to_embedding = {}
    for key in int_to_activity:
        activity_index_to_embedding[key] = activity_dict[int_to_activity[key]]

    max_sequence_length = X.shape[
        1]  # TODO: change this to fit the maximum sequence length of all the datasets
    #total_activities = y_train.shape[1]
    ACTION_MAX_LENGTH = embedding_matrix.shape[1]

    print('X shape:', X.shape)
    print('y shape:', y_emb.shape)
    print('y index shape:', y_index.shape)

    print('max sequence length:', max_sequence_length)
    print('features per action:', embedding_matrix.shape[0])
    print('Action max length:', ACTION_MAX_LENGTH)

    # 2: Generate K partitions of the dataset (KFold cross-validation)
    # TODO: Decide between KFold or StratifiedKFold
    # if StratifiedKFold
    skf = StratifiedKFold(n_splits=FOLDS)

    # if KFold
    #kf = KFold(n_splits = FOLDS)

    fold = 0
    # 4: For each partition (train, test):
    metrics_per_fold = utils.init_metrics_per_fold()
    best_epochs = []

    #for train, test in kf.split(X):
    for train, test in skf.split(X, y_index):
        print("%d Train: %s,  test: %s" % (fold, len(train), len(test)))
        X_train = X[train]
        y_train = y_emb[train]
        y_train_index = y_index[train]
        X_val = X[test]
        y_val = y_emb[test]
        y_val_index = y_index_one_hot[test]
        print('Activity distribution %s' % Counter(y_index))

        #   3.1: Build the LSTM model
        print('Building model...')
        sys.stdout.flush()

        model = Sequential()

        model.add(
            Embedding(input_dim=embedding_matrix.shape[0],
                      output_dim=embedding_matrix.shape[1],
                      weights=[embedding_matrix],
                      input_length=max_sequence_length,
                      trainable=EMB_TRAINABLE))
        # Change input shape when using embeddings
        model.add(
            LSTM(512,
                 return_sequences=False,
                 recurrent_dropout=DROPOUT,
                 dropout=DROPOUT,
                 input_shape=(max_sequence_length, embedding_matrix.shape[1])))
        # For regression use a linear dense layer with embedding_matrix.shape[1] size (300 in this case)
        # TODO: consider the need of normalization before calculating the loss (we may use a Lambda layer with L2 norm)
        model.add(Dense(embedding_matrix.shape[1]))
        # TODO: check different regression losses; cosine_proximity could be the best one for us?
        #model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mae'])
        model.compile(loss=LOSS,
                      optimizer=OPTIMIZER,
                      metrics=['cosine_proximity', 'mse', 'mae'])
        print('Model built')
        print(model.summary())
        sys.stdout.flush()

        #   3.2: Manage imbalanced data in the training set (SMOTE?) -> Conf option TREAT_IMBALANCE
        # NOTE: We may have a problem with SMOTE, since there are some classes with only 1-3 samples and SMOTE needs n_samples < k_neighbors (~5)
        # NOTE: RandomOverSampler could do the trick, however it generates just copies of current samples
        # TODO: Think about a combination between RandomOverSampler for n_samples < 5 and SMOTE?
        # TODO: First attempt without imbalance management
        if (TREAT_IMBALANCE == True):
            ros = RandomOverSampler(
                random_state=42
            )  # sampling_strategy={4:10, 12:10, 14:10, 8:10, 13:10}
            print('Original dataset samples for training %s' %
                  len(y_train_index))
            print('Original dataset shape for training %s' %
                  Counter(y_train_index))
            X_train_res, y_train_index_res = ros.fit_resample(
                X_train, y_train_index)
            print('Resampled dataset samples for training %s' %
                  len(y_train_index_res))
            print('Resampled dataset shape for training %s' %
                  Counter(y_train_index_res))
            y_train_res = []
            for j in y_train_index_res:
                y_train_res.append(activity_index_to_embedding[str(
                    y_train_index_res[j])])
            y_train_res = np.array(y_train_res)
            print("y_train_res shape: ", y_train_res.shape)
        else:
            X_train_res = X_train
            y_train_res = y_train

        #   3.3: Train the model with the imbalance-corrected training set and use the test set to validate
        print('Training...')
        sys.stdout.flush()
        # Define the callbacks to be used (EarlyStopping and ModelCheckpoint)
        # TODO: Do we need EarlyStopping here?
        #earlystopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0)
        # TODO: improve file naming for multiple architectures
        weights_file = WEIGHTS + DATASET + '/' + str(filenumber).zfill(
            2) + '-' + EXPERIMENT_ID + '-fold' + str(fold) + WEIGHTS_FILE_ROOT
        modelcheckpoint = ModelCheckpoint(weights_file,
                                          monitor='val_loss',
                                          save_best_only=True,
                                          verbose=0)
        callbacks = [modelcheckpoint]
        history = model.fit(X_train_res,
                            y_train_res,
                            batch_size=BATCH_SIZE,
                            epochs=EPOCHS,
                            validation_data=(X_val, y_val),
                            shuffle=True,
                            callbacks=callbacks)
        #   3.4: Store the generated learning curves and metrics with the best model (ModelCheckpoint?) -> Conf option SAVE
        plot_filename = PLOTS + DATASET + '/' + str(filenumber).zfill(
            2) + '-' + EXPERIMENT_ID + '-fold' + str(fold)
        #plot_training_info(['loss'], True, history.history, plot_filename)
        if SAVE == True:
            utils.plot_training_info(['loss'], True, history.history,
                                     plot_filename)
            print("Plots saved in " + PLOTS + DATASET + '/')
        print("Training finished")

        # Print the best val_loss
        min_val_loss = min(history.history['val_loss'])
        min_val_loss_index = history.history['val_loss'].index(min_val_loss)
        print("Validation loss: " + str(min_val_loss) + " (epoch " +
              str(history.history['val_loss'].index(min_val_loss)) + ")")
        best_epochs.append(min_val_loss_index)
        model.load_weights(weights_file)
        yp = model.predict(X_val, batch_size=BATCH_SIZE, verbose=1)
        # yp has the embedding predictions of the regressor network
        # Obtain activity labels from embedding predictions
        ypreds = obtain_class_predictions(yp, activity_dict,
                                          activity_to_int_dict,
                                          int_to_activity)

        # Calculate the metrics
        ytrue = np.argmax(y_val_index, axis=1)
        print("ytrue shape: ", ytrue.shape)
        print("ypreds shape: ", ypreds.shape)

        # Use scikit-learn metrics to calculate confusion matrix, accuracy, precision, recall and F-Measure
        """
        cm = confusion_matrix(ytrue, ypreds)
    
        # Normalize the confusion matrix by row (i.e by the number of samples
        # in each class)
        cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        np.set_printoptions(precision=3, linewidth=1000, suppress=True)
        
        # Save also the cm to a txt file
        results_file_root = RESULTS + DATASET + '/' + str(filenumber).zfill(2) + '-' + EXPERIMENT_ID + '-fold' + str(fold)
        np.savetxt(results_file_root + '-cm.txt', cm, fmt='%.0f')   
        
        np.savetxt(results_file_root+'-cm-normalized.txt', cm_normalized, fmt='%.3f')
        print("Confusion matrices saved in " + RESULTS + DATASET + '/')
        """
        # Plot non-normalized confusion matrix -> Conf option SAVE
        if SAVE == True:
            results_file_root = RESULTS + DATASET + '/' + str(
                filenumber).zfill(2) + '-' + EXPERIMENT_ID + '-fold' + str(
                    fold)
            utils.plot_heatmap(
                ytrue,
                ypreds,
                classes=activity_to_int_dict.keys(),
                title='Confusion matrix, without normalization, fold ' +
                str(fold),
                path=results_file_root + '-cm.png')

            # Plot normalized confusion matrix
            utils.plot_heatmap(ytrue,
                               ypreds,
                               classes=activity_to_int_dict.keys(),
                               normalize=True,
                               title='Normalized confusion matrix, fold ' +
                               str(fold),
                               path=results_file_root + '-cm-normalized.png')

        #Dictionary with the values for the metrics (precision, recall and f1)
        metrics = utils.calculate_evaluation_metrics(ytrue, ypreds)
        metrics_per_fold = utils.update_metrics_per_fold(
            metrics_per_fold, metrics)
        # Update fold counter
        fold += 1

    # 5: Calculate the mean and std for the metrics obtained for each partition and store (always)
    metrics_per_fold = utils.calculate_aggregate_metrics_per_fold(
        metrics_per_fold)
    metrics_filename = RESULTS + DATASET + '/' + str(filenumber).zfill(
        2) + '-' + EXPERIMENT_ID + '-complete-metrics.json'
    with open(metrics_filename, 'w') as fp:
        json.dump(metrics_per_fold, fp, indent=4)
    print("Metrics saved in " + metrics_filename)
    print("Avg best epoch: " + str(np.mean(best_epochs)) + ", min: " +
          str(min(best_epochs)) + ", max: " + str(max(best_epochs)))
print(len(y_test), 'testing samples')

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Bidirectional
from keras import optimizers

maxlen = 500

x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

model = Sequential()
model.add(Embedding(max_words, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(128, dropout=0.2, recurrent_dropout=0.2)))

model.add(Dense(1, activation='sigmoid'))

optimizer = optimizers.RMSprop(0.001)
model.compile(optimizer=optimizer,
              loss='binary_crossentropy',
              metrics=['accuracy'])

from keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss',
                           min_delta=0,
                           patience=10,
                           verbose=1,
                           mode='min')
Example #39
0
def cmodel(path):

    l2_n = 0.000025
    learning_rate = 0.001
    batch_s = 128
    epoch = 20
    input_dime = 38
    #train_x += test_x
    #train_y += test_y
    x = []
    y = []
    pathdir = os.listdir(path)
    i = 0
    label = []

    for d in pathdir:
        print("in %s / %s" % (path, d))
        xt = notetotrain(path + d)
        label.append(str(d))
        print(len(xt))
        x += xt
        y = y + [i for j in range(len(xt))]
        #y += label
        i += 1

    #print (len(x))
    #print (len(y))
    #print (y)
    print(label)
    tray = []
    train_x, test_x, train_y, test_y = train_test_split(x,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=1)
    for ia in range(i):
        tray.append(test_y.count(ia))
    print(tray)
    train_x = np.array(train_x) / 37
    train_y = np.array(train_y)
    test_x = np.array(test_x) / 37
    test_y = np.array(test_y)

    #train_xr = [train_x,train_x,train_x]
    #test_xr =  [test_x,test_x,test_x]
    '''
    datapath = "./edata/"
    train_x = np.load(datapath+"trainx.npy")#,train_x)
    train_y = np.load(datapath+"trainy.npy")#,train_y)
    test_x =  np.load(datapath+"testx.npy")#,test_x)
    test_y =  np.load(datapath+"testy.npy")#),test_y)
    #np.load(
    '''
    train_y = to_categorical(train_y, i)
    test_y = to_categorical(test_y, i)
    #print (train_y[0])
    #print (train_x[0])
    num_class = i
    #print (i)
    #print (num_class)
    #print (len(train_y))
    #print (len(train_y[0]))
    #print (train_y[0])
    seed = 7
    np.random.seed(seed)
    model = Sequential()
    o_d = 38 * 120
    '''
    model.add(Reshape((128, 1), input_shape=((128),)))
    model.add(Conv1D(64, 1, activation='relu' ))
    model.add(Conv1D(64, 1, activation='relu'))
    model.add(MaxPooling1D(3))
    model.add(Conv1D(128, 1, activation='relu'))
    model.add(Conv1D(128, 1, activation='relu'))
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(0.5))
    '''

    model.add(Embedding(input_dim=38, output_dim=o_d, input_length=128))

    model.add(Dense(128, activation="relu"))

    #model.add(Embedding(input_dim=38,output_dim=o_d,input_length=128))
    #model.add(Bidirectional(LSTM(1024,return_sequences=True),input_shape=(52,1)))
    model.add(
        LSTM(
            128,
            return_sequences=True,
            activation="relu",
            #use_bias = True,
            recurrent_initializer="ones",
            kernel_initializer=glorot_normal(seed=1)))
    #model.add(Dropout(0.1))
    model.add(Dense(128, activation="relu"))

    model.add(
        LSTM(
            64,
            return_sequences=True,
            activation="relu",
            #use_bias = True,
            recurrent_initializer="ones",
            kernel_initializer=glorot_normal(seed=2)))
    #model.add(Dropout(0.1))
    model.add(Dense(64, activation="relu"))
    #model.add(Dropout(0.1))

    model.add(  #Bidirectional
        (LSTM(64,
              activation="tanh",
              use_bias=True,
              recurrent_initializer="orthogonal",
              kernel_initializer=glorot_normal(seed=3))))

    model.add(
        Dense(i,
              kernel_initializer=keras.initializers.random_normal(stddev=1,
                                                                  seed=3),
              kernel_regularizer=l2(l2_n)
              #activation="softmax"
              ))
    #model.add(BatchNormalization())
    model.add(Activation("softmax"))

    print(model.summary())
    #adam = Adam(learning_rate)
    checkpath = "../RNNcheckpoint/esaved-model-{epoch:02d}-{val_acc:.2f}.hdf5"
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(lr=learning_rate,
                       decay=0.01),  #SGD(lr=learning_rate,decay = 1e-5,
        #momentum=0.9,nesterov=True),#'adam',
        metrics=['acc'])
    #model = keras.models.load_model("./emodel.h5")
    checkpoint = ModelCheckpoint(checkpath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=False,
                                 mode='max')
    callbacks_list = [checkpoint]
    model.fit(train_x,
              train_y,
              batch_size=batch_s,
              callbacks=callbacks_list,
              epochs=epoch,
              verbose=1,
              validation_data=(test_x, test_y),
              shuffle=True)
    #Ki.clear_session()
    mp = "./emodel.h5"
    model.save(mp)
Example #40
0
    def _generate_model(self, lembedding, num_classes=2, first_kernel_size=3,
                        num_features=1024, conv_dropout=False, train_vectors=True):

        model = Sequential()
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None,
                            input_length=lembedding.size)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None,
                            input_length=lembedding.size)
        emb.trainable = train_vectors
        model.add(emb)

        # Two conv layers with original kernel size, maxpooling is 2
        model.add(Convolution1D(num_features, first_kernel_size, init='uniform'))
        model.add(Activation('relu'))
        model.add(MaxPooling1D(2))
        if conv_dropout:
            model.add(Dropout(0.25))

        model.add(Convolution1D(num_features, first_kernel_size, init='uniform'))
        model.add(Activation('relu'))
        model.add(MaxPooling1D(2))
        if conv_dropout:
            model.add(Dropout(0.25))

        # Three conv layers with kernel size = 3, no maxpooling
        model.add(Convolution1D(num_features, 3, init='uniform'))
        model.add(Activation('relu'))
        if conv_dropout:
            model.add(Dropout(0.25))

        model.add(Convolution1D(num_features, 3, init='uniform'))
        model.add(Activation('relu'))
        if conv_dropout:
            model.add(Dropout(0.25))

        model.add(Convolution1D(num_features, 3, init='uniform'))
        model.add(Activation('relu'))
        if conv_dropout:
            model.add(Dropout(0.25))

        # One final conv layer with maxpooling
        model.add(Convolution1D(num_features, 3, init='uniform'))
        model.add(Activation('relu'))
        model.add(MaxPooling1D(2))
        model.add(Dropout(0.25))

        model.add(Flatten())

        # Two dense layers with heavy dropout
        model.add(Dense(2048))
        model.add(Dropout(0.5))

        model.add(Dense(2048))
        model.add(Dropout(0.5))

        if num_classes == 2:
            model.add(Dense(1, activation='sigmoid'))
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.add(Dense(num_classes, activation='softmax'))
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model
Example #41
0
File: SRNN.py Project: whonor/srnn
    embeddings_index[word] = coefs
f.close()
print('Found %s word vectors.' % len(embeddings_index))

#use pre-trained GloVe word embeddings to initialize the embedding layer
embedding_matrix = np.random.random((MAX_NUM_WORDS + 1, EMBEDDING_DIM))
for word, i in vocab.items():
    if i < MAX_NUM_WORDS:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be random initialized.
            embedding_matrix[i] = embedding_vector

embedding_layer = Embedding(MAX_NUM_WORDS + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_LEN / 64,
                            trainable=True)

#build model
print("Build Model")
input1 = Input(shape=(int(MAX_LEN / 64), ), dtype='int32')
embed = embedding_layer(input1)
gru1 = GRU(NUM_FILTERS,
           recurrent_activation='sigmoid',
           activation=None,
           return_sequences=False)(embed)
Encoder1 = Model(input1, gru1)

input2 = Input(shape=(
    8,