Beispiel #1
0
def build_reference_annotation_1d_model_from_args(args,
                                                  conv_width = 6,
                                                  conv_layers = [128, 128, 128, 128],
                                                  conv_dropout = 0.0,
                                                  conv_batch_normalize = False,
                                                  spatial_dropout = True,
                                                  max_pools = [],
                                                  padding='valid',
                                                  activation = 'relu',
                                                  annotation_units = 16,
                                                  annotation_shortcut = False,
                                                  annotation_batch_normalize = True,
                                                  fc_layers = [64],
                                                  fc_dropout = 0.0,
                                                  fc_batch_normalize = False,
                                                  fc_initializer = 'glorot_normal',
                                                  kernel_initializer = 'glorot_normal',
                                                  alpha_dropout = False
                                                  ):
    '''Build Reference 1d CNN model for classifying variants.

    Architecture specified by parameters.
    Dynamically sets input channels based on args via defines.total_input_channels_from_args(args)
    Uses the functional API.
    Prints out model summary.

    Arguments
        args.annotations: The variant annotations, perhaps from a HaplotypeCaller VCF.
        args.labels: The output labels (e.g. SNP, NOT_SNP, INDEL, NOT_INDEL)

    Returns
        The keras model
    '''
    in_channels = tensor_maps.total_input_channels_from_args(args)
    concat_axis = -1
    x = reference = Input(shape=(args.window_size, in_channels), name=args.tensor_name)

    max_pool_diff = len(conv_layers)-len(max_pools)
    for i,c in enumerate(conv_layers):

        if conv_batch_normalize:
            x = Conv1D(filters=c, kernel_size=conv_width, activation='linear', padding=padding, kernel_initializer=kernel_initializer)(x)
            x = BatchNormalization(axis=concat_axis)(x)
            x = Activation(activation)(x)
        else:
            x = Conv1D(filters=c, kernel_size=conv_width, activation=activation, padding=padding, kernel_initializer=kernel_initializer)(x)

        if conv_dropout > 0 and alpha_dropout:
            x = AlphaDropout(conv_dropout)(x)
        elif conv_dropout > 0 and spatial_dropout:
            x = SpatialDropout1D(conv_dropout)(x)
        elif conv_dropout > 0:
            x = Dropout(conv_dropout)(x)

        if i >= max_pool_diff:
            x = MaxPooling1D(max_pools[i-max_pool_diff])(x)

    f = Flatten()(x)

    annotations = annotations_in = Input(shape=(len(args.annotations),), name=args.annotation_set)
    if annotation_batch_normalize:
        annotations_in = BatchNormalization(axis=concat_axis)(annotations_in)
    annotation_mlp = Dense(units=annotation_units, kernel_initializer=fc_initializer, activation=activation)(annotations_in)

    x = layers.concatenate([f, annotation_mlp], axis=1)
    for fc in fc_layers:
        if fc_batch_normalize:
            x = Dense(units=fc, activation='linear', kernel_initializer=fc_initializer)(x)
            x = BatchNormalization(axis=1)(x)
            x = Activation(activation)(x)
        else:
            x = Dense(units=fc, activation=activation, kernel_initializer=fc_initializer)(x)

        if fc_dropout > 0 and alpha_dropout:
            x = AlphaDropout(fc_dropout)(x)
        elif fc_dropout > 0:
            x = Dropout(fc_dropout)(x)

    if annotation_shortcut:
        x = layers.concatenate([x, annotations_in], axis=1)

    prob_output = Dense(units=len(args.labels), activation='softmax', name='softmax_predictions')(x)

    model = Model(inputs=[reference, annotations], outputs=[prob_output])

    adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1.)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=get_metrics(args.labels))
    model.summary()

    if os.path.exists(args.weights_hd5):
        model.load_weights(args.weights_hd5, by_name=True)
        print('Loaded model weights from:', args.weights_hd5)

    return model
Beispiel #2
0
embedding_matrix = np.zeros((num_words, embed_size))
for word, i in word_index.items():
    if i >= max_features:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector


# In[ ]:


sequence_input = Input(shape=(maxlen, ))
x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable = False)(sequence_input)
x = SpatialDropout1D(0.2)(x)
x = Bidirectional(GRU(128, return_sequences=True,dropout=0.1,recurrent_dropout=0.1))(x)
x = Conv1D(64, kernel_size = 3, padding = "valid", kernel_initializer = "glorot_uniform")(x)
avg_pool = GlobalAveragePooling1D()(x)
max_pool = GlobalMaxPooling1D()(x)
x = concatenate([avg_pool, max_pool]) 
# x = Dense(128, activation='relu')(x)
# x = Dropout(0.1)(x)
preds = Dense(6, activation="sigmoid")(x)
model = Model(sequence_input, preds)
model.compile(loss='binary_crossentropy',optimizer=Adam(lr=1e-3),metrics=['accuracy'])


# In[ ]:

Beispiel #3
0
ln = [len(i.split()) for i in corpus]
max_len = max(ln)

from keras.preprocessing.sequence import pad_sequences

w2v_pad = pad_sequences(w2v, maxlen=max_len)
inp = max([max(i) for i in w2v])

model2 = Sequential()
from keras.layers import Embedding
model2.add(
    Embedding(input_dim=inp + 1, output_dim=128,
              input_length=w2v_pad.shape[1]))
from keras.layers import SpatialDropout1D
model2.add(SpatialDropout1D(rate=.1))
model2.summary()
from keras.layers import LSTM
model2.add(LSTM(units=300, dropout=.1, recurrent_dropout=.1))
model2.add(Dense(3, activation='softmax'))
model2.compile(loss="binary_crossentropy",
               metrics=['accuracy'],
               optimizer='adam')

model2.fit(x=w2v_pad, y=c2, epochs=5)

model2.predict(w2v_pad)
#prd2=pd.DataFrame(data=np.round(model2.predict(w2v_pad)),columns=idx)
"""
#notes
dense=1 is for continous, then you dont hot encode the y.
Beispiel #4
0
embedding_matrix = np.concatenate((embedding_matrix_1, embedding_matrix_2, embedding_matrix_3, embedding_matrix_4), axis=1)  
del embedding_matrix_1, embedding_matrix_2, embedding_matrix_3, embedding_matrix_4
gc.collect()
np.shape(embedding_matrix)

# **LSTM:**

# In[ ]:


# https://www.kaggle.com/sudalairajkumar/a-look-at-different-embeddings
# https://www.kaggle.com/strideradu/word2vec-and-gensim-go-go-go

inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size * 4, weights=[embedding_matrix])(inp)
x = SpatialDropout1D(S_DROPOUT)(x)
x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x)
avg_pool = GlobalAveragePooling1D()(x)
max_pool = GlobalMaxPooling1D()(x)
conc = concatenate([avg_pool, max_pool])
x = Dense(16, activation="relu")(conc)
x = Dropout(DROPOUT)(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# In[ ]:


model.fit(train_X, train_y, batch_size=512, epochs=2, validation_data=(val_X, val_y))
Beispiel #5
0
    for col in cont_cols:
        X[col] = dataset[col].values
    return X


# Dictionary of inputs
emb_n = 40
dense_n = 1000
# Build the inputs, embeddings and concatenate them all for each column
emb_inputs = dict((col, Input(shape=[1], name=col)) for col in embids)
cont_inputs = dict((col, Input(shape=[1], name=col)) for col in cont_cols)
emb_model = dict(
    (col, Embedding(embmaxs[col], emb_n)(emb_inputs[col])) for col in embids)
fe = concatenate([(emb_) for emb_ in emb_model.values()])
### Rest of the model
s_dout = SpatialDropout1D(0.1)(fe)
fl1 = Flatten()(s_dout)
# conv_layers = dict(( ('conv'+str(i), Conv1D(int(200/i), kernel_size=2**i, strides=1, padding='same', name = 'conv'+str(i))(s_dout)) for i in range(2, 5)))
conv1 = Conv1D(400, kernel_size=4, strides=1, padding='same',
               name='conv1')(s_dout)
conv2 = Conv1D(200, kernel_size=8, strides=1, padding='same',
               name='conv2')(conv1)
conv3 = Conv1D(100, kernel_size=16, strides=1, padding='same',
               name='conv3')(conv2)
conv4 = Conv1D(50, kernel_size=32, strides=1, padding='same',
               name='conv4')(conv3)
#flatten_layers = dict((  ('flatten_conv'+str(i), Flatten(name = 'flatten_conv'+str(i))(conv_layers['conv'+str(i)])    )  for i in range(2, 5)))
flatten_layer = Flatten(name='flatten_conv4')(conv4)
# concat = concatenate([(f_inp) for f_inp in flatten_layers.values()] + [(c_inp) for c_inp in cont_inputs.values()])
concat = concatenate([(flatten_layer)] + [(c_inp)
                                          for c_inp in cont_inputs.values()])
Beispiel #6
0
    def compile_elmo(self, print_summary=False):
        """
        Compiles a Language Model RNN based on the given parameters
        """

        if self.parameters['token_encoding'] == 'word':
            # Train word embeddings from scratch
            word_inputs = Input(shape=(None, ),
                                name='word_indices',
                                dtype='int32')
            embeddings = Embedding(self.parameters['vocab_size'],
                                   self.parameters['hidden_units_size'],
                                   trainable=True,
                                   name='token_encoding')
            inputs = embeddings(word_inputs)

            # Token embeddings for Input
            drop_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(inputs)
            lstm_inputs = TimestepDropout(
                self.parameters['word_dropout_rate'])(drop_inputs)

            # Pass outputs as inputs to apply sampled softmax
            next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32')
            previous_ids = Input(shape=(None, 1),
                                 name='previous_ids',
                                 dtype='float32')
        elif self.parameters['token_encoding'] == 'char':
            # Train character-level representation
            word_inputs = Input(shape=(
                None,
                self.parameters['token_maxlen'],
            ),
                                dtype='int32',
                                name='char_indices')
            inputs = self.char_level_token_encoder()(word_inputs)

            # Token embeddings for Input
            drop_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(inputs)
            lstm_inputs = TimestepDropout(
                self.parameters['word_dropout_rate'])(drop_inputs)

            # Pass outputs as inputs to apply sampled softmax
            next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32')
            previous_ids = Input(shape=(None, 1),
                                 name='previous_ids',
                                 dtype='float32')

        # Reversed input for backward LSTMs
        re_lstm_inputs = Lambda(function=ELMo.reverse)(lstm_inputs)
        mask = Lambda(function=ELMo.reverse)(drop_inputs)

        # Forward LSTMs
        for i in range(self.parameters['n_lstm_layers']):
            if self.parameters['cuDNN']:
                lstm = CuDNNLSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(lstm_inputs)
            else:
                lstm = LSTM(units=self.parameters['lstm_units_size'],
                            return_sequences=True,
                            activation="tanh",
                            recurrent_activation='sigmoid',
                            kernel_constraint=MinMaxNorm(
                                -1 * self.parameters['cell_clip'],
                                self.parameters['cell_clip']),
                            recurrent_constraint=MinMaxNorm(
                                -1 * self.parameters['cell_clip'],
                                self.parameters['cell_clip']))(lstm_inputs)
            lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs])
            # Projection to hidden_units_size
            proj = TimeDistributed(
                Dense(self.parameters['hidden_units_size'],
                      activation='linear',
                      kernel_constraint=MinMaxNorm(
                          -1 * self.parameters['proj_clip'],
                          self.parameters['proj_clip'])))(lstm)
            # Merge Bi-LSTMs feature vectors with the previous ones
            lstm_inputs = add([proj, lstm_inputs],
                              name='f_block_{}'.format(i + 1))
            # Apply variational drop-out between BI-LSTM layers
            lstm_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(lstm_inputs)

        # Backward LSTMs
        for i in range(self.parameters['n_lstm_layers']):
            if self.parameters['cuDNN']:
                re_lstm = CuDNNLSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(re_lstm_inputs)
            else:
                re_lstm = LSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(re_lstm_inputs)
            re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask])
            # Projection to hidden_units_size
            re_proj = TimeDistributed(
                Dense(self.parameters['hidden_units_size'],
                      activation='linear',
                      kernel_constraint=MinMaxNorm(
                          -1 * self.parameters['proj_clip'],
                          self.parameters['proj_clip'])))(re_lstm)
            # Merge Bi-LSTMs feature vectors with the previous ones
            re_lstm_inputs = add([re_proj, re_lstm_inputs],
                                 name='b_block_{}'.format(i + 1))
            # Apply variational drop-out between BI-LSTM layers
            re_lstm_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(re_lstm_inputs)

        # Reverse backward LSTMs' outputs = Make it forward again
        re_lstm_inputs = Lambda(function=ELMo.reverse,
                                name="reverse")(re_lstm_inputs)

        # Project to Vocabulary with Sampled Softmax
        sampled_softmax = SampledSoftmax(
            num_classes=self.parameters['vocab_size'],
            num_sampled=int(self.parameters['num_sampled']),
            tied_to=embeddings if self.parameters['weight_tying'] else None)
        outputs = sampled_softmax([lstm_inputs, next_ids])
        re_outputs = sampled_softmax([re_lstm_inputs, previous_ids])

        self._model = Model(inputs=[word_inputs, next_ids, previous_ids],
                            outputs=[outputs, re_outputs])
        self._model.compile(optimizer=Adagrad(
            lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']),
                            loss=None)
        if print_summary:
            self._model.summary()
Beispiel #7
0
def CNNRNN_architecture(X_train_word_seq, X_test_word_seq, Y_train,
                        max_seq_len, nb_words, embedding_matrix, embed_dim,
                        output_dir, job_number, batch_size, num_epochs,
                        num_filters, weight_decay):
    num_classes = np.max(Y_train) + 1

    # CNN architecture
    print("training CNN RNN...")

    model = Sequential()
    model.add(
        Embedding(nb_words,
                  embed_dim,
                  weights=[embedding_matrix],
                  input_length=max_seq_len,
                  trainable=False))
    model.add(SpatialDropout1D(0.2))
    model.add(Conv1D(num_filters, 3, activation='relu', padding='same'))
    model.add(MaxPooling1D(2))
    # model.add(Bidirectional(GRU(128, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)))
    model.add(GlobalMaxPooling1D())
    model.add(
        Dense(32,
              activation='relu',
              kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Dense(num_classes,
                    activation='sigmoid'))  # multi-label (k-hot encoding)

    adam = optimizers.Adam(lr=0.001,
                           beta_1=0.9,
                           beta_2=0.999,
                           epsilon=1e-08,
                           decay=0.0)
    model.compile(loss='binary_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])
    # checkpoint
    filepath = output_dir
    checkpoint = ModelCheckpoint(filepath + "best.h5",
                                 monitor='val_accuracy',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='max')
    model.summary()
    # define callbacks
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0.01,
                                   patience=5,
                                   verbose=1)
    callbacks_list = [early_stopping, checkpoint]

    # to categorical TO DO

    Y_train = utils.to_categorical(Y_train, num_classes)
    # model training
    hist = model.fit(X_train_word_seq,
                     Y_train,
                     batch_size=batch_size,
                     epochs=num_epochs,
                     callbacks=callbacks_list,
                     validation_split=0.2,
                     shuffle=True,
                     verbose=2)
    print('X_test_word_seq.shape', X_test_word_seq.shape)
    Y_predict = model.predict(X_test_word_seq)
    print('Y_predict', Y_predict)
    np.save(str(output_dir) + 'Y_predict.npy', Y_predict)
    model.save(str(output_dir) + 'model' + str(job_number))

    return Y_predict
Beispiel #8
0
def rnn(embedding_matrix, config):
    if config['rnn'] == 'gru' and config['gpu']:
        encode = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
    else:
        encode = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True))

    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    q1_encoded = encode(q1_embed)
    q2_encoded = encode(q2_embed)
    q1_encoded = Dropout(0.2)(q1_encoded)
    q2_encoded = Dropout(0.2)(q2_encoded)
    # 双向
    #     q1_encoded = encode2(q1_encoded)
    #     q2_encoded = encode2(q2_encoded)
    # resnet
    rnn_layer2_input1 = concatenate([q1_embed, q1_encoded])
    rnn_layer2_input2 = concatenate([q2_embed, q2_encoded])
    q1_encoded2 = encode2(rnn_layer2_input1)
    q2_encoded2 = encode2(rnn_layer2_input2)

    # add res shortcut
    res_block1 = add([q1_encoded, q1_encoded2])
    res_block2 = add([q2_encoded, q2_encoded2])
    rnn_layer3_input1 = concatenate([q1_embed, res_block1])
    rnn_layer3_input2 = concatenate([q2_embed, res_block2])
    #     rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2])
    #     rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2])
    q1_encoded3 = encode3(rnn_layer3_input1)
    q2_encoded3 = encode3(rnn_layer3_input2)
    #     merged1 = GlobalMaxPool1D()(q1_encoded3)
    #     merged2 = GlobalMaxPool1D()(q2_encoded3)
    #     q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1)
    #     q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1)

    #     merged1 = concatenate([q1_encoded2, q1_embed], axis=-1)
    #     merged2 = concatenate([q2_encoded2, q2_embed], axis=-1)
    #     # TODO add attention rep , maxpooling rep
    q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3])
    q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3])
    merged1 = GlobalMaxPool1D()(q1_encoded3)
    merged2 = GlobalMaxPool1D()(q2_encoded3)
    # avg1 = GlobalAvgPool1D()(q1_encoded3)
    # avg2 = GlobalAvgPool1D()(q2_encoded3)
    # merged1 = concatenate([max1,avg1])
    # merged2 = concatenate([max2,avg2])
    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2])
    mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2])
    #     jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)-
    #                                               K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2])
    #     merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep])
    merged = Concatenate()([merged1, merged2, mul_rep, sub_rep])
    # Classifier
    dense = Dropout(config['dense_dropout'])(merged)
    dense = BatchNormalization()(dense)
    dense = Dense(config['dense_dim'], activation='relu')(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = BatchNormalization()(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
                  output_dim=20,
                  input_length=max_len,
                  mask_zero=True)(input)

######################################################
##  Option2: Word2Vec Embedding Layer
######################################################
# wv_layer = Embedding(n_tokens,
#                      vec_dimension,
#                      mask_zero=False, weights=[wv_matrix],
#                      input_length=max_len,
#                      trainable=False)
# model = wv_layer(input) # embedded_sequences

# Input Dropout
model = SpatialDropout1D(0.1)(model)
model = Bidirectional(LSTM(units=100, return_sequences=True))(model)

# Output
output_drop = Dropout(0.2)(model)
dense = TimeDistributed(Dense(n_tags, activation="softmax"))(
    output_drop)  # softmax output layer
# dense = TimeDistributed(Dense(100, activation="softmax"))(output_drop)
crf = CRF(n_tags)  # CRF layer
# crf = CRF(n_tags,sparse_target=True)  # CRF layer
out = crf(dense)  # output
# out = CRF(n_tags)(dense)

### 3. Build Model
model = Model(inputs=input, outputs=out)
batch_size = 32
Beispiel #10
0
from keras.layers import SpatialDropout1D

my_input = Input(shape=(None, ))
embedding = Embedding(
    input_dim=embedding_matrix.shape[0],
    input_length=max_seq_len,
    output_dim=word_vector_dim,
    trainable=True,
)(my_input)

x = Conv1D(
    filters=nb_filters,
    kernel_size=filter_size_a,
    activation='relu',
)(embedding)
x = SpatialDropout1D(drop_rate)(x)
x = MaxPooling1D(pool_size=5)(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
prob = Dense(
    6,
    activation='softmax',
)(x)
model = Model(my_input, prob)

model.compile(loss='categorical_crossentropy',
              optimizer=my_optimizer,
              metrics=['accuracy'])

model.fit(
    x_train,
Beispiel #11
0
networkcore_emb = sparse.load_npz("model/weibo_coreembedding.npz").todense()
embeddedc = Embedding(len(words) + 1,
                      actors_size,
                      embeddings_initializer=Constant(networkcore_emb),
                      input_length=seqlen,
                      mask_zero=False,
                      trainable=True)(seqsb)

dropout = Dropout(rate=Dropoutrate)(seqsa)
middle = Dense(Hidden,
               activation='relu',
               kernel_regularizer=regularizers.l2(Regularization))(dropout)
batchNorm = BatchNormalization()(middle)

dropoutb = SpatialDropout1D(rate=Dropoutrate)(embedded)
blstm = Bidirectional(CuDNNGRU(Hidden, return_sequences=False),
                      merge_mode='sum')(dropoutb)
batchNormb = BatchNormalization()(blstm)

dropoutc = SpatialDropout1D(rate=Dropoutrate)(embeddedc)
conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(dropoutc)
mpool = MaxPooling1D()(conv)
conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(mpool)
mpool = MaxPooling1D()(conv)
conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(mpool)
mpool = MaxPooling1D()(conv)
conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(mpool)
mpool = MaxPooling1D()(conv)
conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(mpool)
mpool = MaxPooling1D()(conv)
Beispiel #12
0
def get_av_pos_cnn():

    filter_nums = 325
    drop_rate = 0.5

    input_layer = Input(shape=(MAX_SEQUENCE_LENGTH, ), name='Onehot')
    input_layer_2 = Input(shape=(MAX_SEQUENCE_LENGTH, ), name='POS')

    embedding_layer = Embedding(VOCAB_SIZE,
                                EMBEDDING_SIZE,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=False)(input_layer)

    embedding_layer2 = Embedding(50,
                                 30,
                                 input_length=MAX_SEQUENCE_LENGTH,
                                 trainable=True)(input_layer_2)

    embedding_layer = concatenate([embedding_layer, embedding_layer2], axis=2)
    embedded_sequences = SpatialDropout1D(0.25)(embedding_layer)

    conv_0 = Conv1D(filter_nums,
                    1,
                    kernel_initializer="normal",
                    padding="valid",
                    activation="relu")(embedded_sequences)
    conv_1 = Conv1D(filter_nums,
                    2,
                    kernel_initializer="normal",
                    padding="valid",
                    activation="relu")(embedded_sequences)
    conv_2 = Conv1D(filter_nums,
                    3,
                    kernel_initializer="normal",
                    padding="valid",
                    activation="relu")(embedded_sequences)
    conv_3 = Conv1D(filter_nums,
                    4,
                    kernel_initializer="normal",
                    padding="valid",
                    activation="relu")(embedded_sequences)

    attn_0 = Attention(MAX_SEQUENCE_LENGTH)(conv_0)
    avg_0 = GlobalAveragePooling1D()(conv_0)
    maxpool_0 = GlobalMaxPooling1D()(conv_0)

    maxpool_1 = GlobalMaxPooling1D()(conv_1)
    attn_1 = Attention(MAX_SEQUENCE_LENGTH)(conv_1)
    avg_1 = GlobalAveragePooling1D()(conv_1)

    maxpool_2 = GlobalMaxPooling1D()(conv_2)
    attn_2 = Attention(MAX_SEQUENCE_LENGTH)(conv_2)
    avg_2 = GlobalAveragePooling1D()(conv_2)

    maxpool_3 = GlobalMaxPooling1D()(conv_3)
    attn_3 = Attention(MAX_SEQUENCE_LENGTH)(conv_3)
    avg_3 = GlobalAveragePooling1D()(conv_3)

    v0_col = merge([maxpool_0, maxpool_1, maxpool_2, maxpool_3],
                   mode='concat',
                   concat_axis=1)
    v1_col = merge([attn_0, attn_1, attn_2, attn_3],
                   mode='concat',
                   concat_axis=1)
    v2_col = merge([avg_1, avg_2, avg_0, avg_3], mode='concat', concat_axis=1)
    merged_tensor = merge([v0_col, v1_col, v2_col],
                          mode='concat',
                          concat_axis=1)
    output = Dropout(0.7)(merged_tensor)
    output = Dense(units=144)(output)
    output = Activation('relu')(output)
    # output = Dropout(0.5)(output)
    output = Dense(units=6, activation='sigmoid')(output)

    model = Model(inputs=[input_layer, input_layer_2], outputs=output)
    model.compile(loss='binary_crossentropy',
                  optimizer=adam_optimizer,
                  metrics=['accuracy'])
    return model
Beispiel #13
0
def get_model():
    nclass = 5
    inp = Input(shape=(3000, 1))
    img_1 = Convolution1D(16,
                          kernel_size=5,
                          activation=activations.relu,
                          padding="valid")(inp)
    img_1 = Convolution1D(16,
                          kernel_size=5,
                          activation=activations.relu,
                          padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = SpatialDropout1D(rate=0.01)(img_1)
    img_1 = Convolution1D(32,
                          kernel_size=3,
                          activation=activations.relu,
                          padding="valid")(img_1)
    img_1 = Convolution1D(32,
                          kernel_size=3,
                          activation=activations.relu,
                          padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = SpatialDropout1D(rate=0.01)(img_1)
    img_1 = Convolution1D(32,
                          kernel_size=3,
                          activation=activations.relu,
                          padding="valid")(img_1)
    img_1 = Convolution1D(32,
                          kernel_size=3,
                          activation=activations.relu,
                          padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = SpatialDropout1D(rate=0.01)(img_1)
    img_1 = Convolution1D(256,
                          kernel_size=3,
                          activation=activations.relu,
                          padding="valid")(img_1)
    img_1 = Convolution1D(256,
                          kernel_size=3,
                          activation=activations.relu,
                          padding="valid")(img_1)
    img_1 = GlobalMaxPool1D()(img_1)
    img_1 = Dropout(rate=0.01)(img_1)

    dense_1 = Dropout(rate=0.01)(Dense(64,
                                       activation=activations.relu,
                                       name="dense_1")(img_1))
    dense_1 = Dropout(rate=0.05)(Dense(64,
                                       activation=activations.relu,
                                       name="dense_2")(dense_1))
    dense_1 = Dense(nclass, activation=activations.softmax,
                    name="dense_3")(dense_1)

    model = models.Model(inputs=inp, outputs=dense_1)
    opt = optimizers.Adam(0.001)

    model.compile(optimizer=opt,
                  loss=losses.sparse_categorical_crossentropy,
                  metrics=['acc'])
    model.summary()
    return model
Beispiel #14
0
    def cabasc(self):
        def sequence_mask(sequence):
            return K.sign(K.max(K.abs(sequence), 2))

        def sequence_length(sequence):
            return K.cast(K.sum(sequence_mask(sequence), 1), tf.int32)

        input_text = Input(shape=(self.max_len, ))
        input_text_l = Input(shape=(self.max_len, ))
        input_text_r = Input(shape=(self.max_len, ))
        input_aspect = Input(shape=(1, ))
        input_mask = Input(shape=(self.max_len, ))

        word_embedding = Embedding(input_dim=self.max_content_vocab_size,
                                   output_dim=self.content_embed_dim)
        text_embed = SpatialDropout1D(0.2)(word_embedding(input_text))
        text_l_embed = SpatialDropout1D(0.2)(word_embedding(input_text_l))
        text_r_embed = SpatialDropout1D(0.2)(word_embedding(input_text_r))

        asp_embedding = Embedding(input_dim=self.max_aspect_vocab_size,
                                  output_dim=self.aspect_embed_dim)
        aspect_embed = asp_embedding(input_aspect)
        aspect_embed = Flatten()(aspect_embed)  # reshape to 2d

        # regarding aspect string as the first unit
        hidden_l = GRU(self.lstm_units,
                       go_backwards=True,
                       return_sequences=True)(text_l_embed)
        hidden_r = GRU(self.lstm_units, return_sequences=True)(text_r_embed)

        # left context attention
        context_attend_l = TimeDistributed(Dense(
            1, activation='sigmoid'))(hidden_l)
        # Note: I couldn't find `reverse_sequence` in keras
        context_attend_l = Lambda(lambda x: tf.reverse_sequence(
            x, sequence_length(x), 1, 0))(context_attend_l)
        context_attend_l = Lambda(lambda x: K.squeeze(x, -1))(context_attend_l)

        # right context attention
        context_attend_r = TimeDistributed(Dense(
            1, activation='sigmoid'))(hidden_r)
        context_attend_r = Lambda(lambda x: K.squeeze(x, -1))(context_attend_r)

        # combine context attention
        # aspect_text_embed = subtract([add([text_l_embed, text_r_embed]), text_embed])
        # aspect_text_mask = Lambda(lambda x: sequence_mask(x))(aspect_text_embed)
        # text_mask = Lambda(lambda x: sequence_mask(x))(text_embed)
        # context_mask = subtract([text_mask, aspect_text_mask])
        # aspect_text_mask_half = Lambda(lambda x: x*0.5)(aspect_text_mask)
        # combine_mask = add([context_mask, aspect_text_mask_half])  # 1 for context, 0.5 for aspect
        context_attend = multiply(
            [add([context_attend_l, context_attend_r]), input_mask])

        # apply context attention
        context_attend_expand = Lambda(lambda x: K.expand_dims(x))(
            context_attend)
        memory = multiply([text_embed, context_attend_expand])

        # sentence-level content attention
        sentence = Lambda(lambda x: K.mean(x, axis=1))(memory)
        final_output = ContentAttention()([memory, aspect_embed, sentence])
        dense_layer = Dense(self.dense_units, activation='relu')(final_output)
        output_layer = Dense(self.n_classes, activation='softmax')(dense_layer)
        return Model(
            [input_text, input_text_l, input_text_r, input_aspect, input_mask],
            output_layer)
Beispiel #15
0
    def build(self):
        depth = [4, 4, 10, 10]
        pooling_type = 'maxpool'
        use_shortcut = False
        input_sent = Input(shape=(self.config.max_len_word,), dtype='int32', name='sent_base')
        weights = np.load(
            os.path.join(self.config.embedding_path, self.config.level + '_level', self.config.embedding_file))

        embedding_layer = Embedding(input_dim=weights.shape[0],
                                    output_dim=weights.shape[-1],
                                    weights=[weights], name='embedding_layer', trainable=True)
        sent_embedding = embedding_layer(input_sent)
        text_embed = SpatialDropout1D(0.2)(sent_embedding)

        # first temporal conv layer
        conv_out = Conv1D(filters=64, kernel_size=3, kernel_initializer='he_uniform', padding='same')(text_embed)
        shortcut = conv_out

        # temporal conv block: 64
        for i in range(depth[0]):
            if i < depth[0] - 1:
                shortcut = conv_out
                conv_out = self.conv_block(inputs=conv_out, filters=64, use_shortcut=use_shortcut, shortcut=shortcut)
            else:
                # shortcut is not used at the last conv block
                conv_out = self.conv_block(inputs=conv_out, filters=64, use_shortcut=use_shortcut, shortcut=None)

        # down-sampling
        # shortcut is the second last conv block output
        conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut,
                                      shortcut=shortcut)
        shortcut = conv_out

        # temporal conv block: 128
        for i in range(depth[1]):
            if i < depth[1] - 1:
                shortcut = conv_out
                conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=shortcut)
            else:
                # shortcut is not used at the last conv block
                conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=None)

        # down-sampling
        conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut,
                                      shortcut=shortcut)
        shortcut = conv_out

        # temporal conv block: 256
        for i in range(depth[2]):
            if i < depth[1] - 1:
                shortcut = conv_out
                conv_out = self.conv_block(inputs=conv_out, filters=256, use_shortcut=use_shortcut, shortcut=shortcut)
            else:
                # shortcut is not used at the last conv block
                conv_out = self.conv_block(inputs=conv_out, filters=256, use_shortcut=use_shortcut, shortcut=None)

        # down-sampling
        conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut,
                                      shortcut=shortcut)

        # temporal conv block: 512
        for i in range(depth[3]):
            if i < depth[1] - 1:
                shortcut = conv_out
                conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=shortcut)
            else:
                # shortcut is not used at the last conv block
                conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=None)

        # 8-max pooling
        conv_out = KMaxPooling(k=8)(conv_out)
        flatten = Flatten()(conv_out)

        fc1 = Dense(2048, activation='relu')(flatten)
        sentence_embed = Dense(2048, activation='relu')(fc1)

        dense_layer = Dense(256, activation='relu')(sentence_embed)
        output = Dense(self.config.num_classes, activation='softmax')(dense_layer)

        return input_sent, output
Beispiel #16
0
def get_char_embedding_model():

    ## Imports
    from keras.models import Model, Input
    from keras.layers import LSTM, Embedding, Dense, TimeDistributed
    from keras.layers import Bidirectional, concatenate, SpatialDropout1D

    ## Apparently the trick here is to wrap the parts that should be applied to characters in a TimeDistributed so that characters in a layer apply the same layers to every character sequence

    ## Returns a Tensor
    word_in = Input(shape=(constants.MAX_SENT_LEN, ))
    ortho_word_in = Input(shape=(constants.MAX_SENT_LEN, ))
    ## To find word embedding
    emb_word = Embedding(input_dim=n_words + 2,
                         output_dim=20,
                         input_length=constants.MAX_SENT_LEN,
                         mask_zero=True)(word_in)

    ortho_emb_word = Embedding(input_dim=n_ortho_words + 2,
                               output_dim=20,
                               input_length=constants.MAX_SENT_LEN,
                               mask_zero=True)(ortho_word_in)

    ## To find character embedding for characters of that word
    char_in = Input(shape=(
        constants.MAX_SENT_LEN,
        constants.MAX_WORD_LEN,
    ))
    emb_char = TimeDistributed(
        Embedding(input_dim=n_chars + 2,
                  output_dim=10,
                  input_length=constants.MAX_WORD_LEN,
                  mask_zero=True))(char_in)

    ortho_char_in = Input(shape=(
        constants.MAX_SENT_LEN,
        constants.MAX_WORD_LEN,
    ))
    ortho_emb_char = TimeDistributed(
        Embedding(input_dim=n_ortho_chars + 2,
                  output_dim=10,
                  input_length=constants.MAX_WORD_LEN,
                  mask_zero=True))(ortho_char_in)

    ## Character CNN to get the word encoding by characters
    # char_encoding = TimeDistributed(Conv1D())

    char_encoding = TimeDistributed(
        LSTM(units=20, return_sequences=False,
             recurrent_dropout=0.5))(emb_char)
    ortho_char_encoding = TimeDistributed(
        LSTM(units=20, return_sequences=False,
             recurrent_dropout=0.5))(ortho_emb_char)
    print(char_encoding.shape, ' | ', ortho_char_encoding.shape)

    ## main LSTM
    x = concatenate(
        [char_encoding, emb_word, ortho_char_encoding, ortho_emb_word])
    x = SpatialDropout1D(0.3)(x)

    main_lstm = Bidirectional(
        LSTM(units=50, return_sequences=True, recurrent_dropout=0.6))(x)

    out = TimeDistributed(Dense(n_tags + 1, activation="softmax"))(main_lstm)

    model = Model([char_in, word_in, ortho_char_in, ortho_word_in], out)

    return model
Beispiel #17
0
    def load_model(self, model):
        '''Loads Keras model and prints its summary.

        # Arguments:
            model: string, model type.
        # Returns:
            model: compiled Keras model
        '''

        #Raname variables for briefness
        V, E, S = self.vocab_size, self.embed_size, self.seq_len

        if model == 'nn':

            model = Sequential([
                Embedding(V, E, input_length=S),
                SpatialDropout1D(0.2),
                Flatten(),
                Dense(100, activation='relu'),
                Dropout(0.7),
                Dense(1, activation='sigmoid')
            ])

        elif model == 'cnn1d':

            model = Sequential([
                Embedding(V, E, input_length=S),
                SpatialDropout1D(0.2),
                Conv1D(64, 5, padding='same', activation='relu'),
                Dropout(0.3),
                MaxPooling1D(),
                Flatten(),
                Dense(100, activation='relu'),
                Dropout(0.7),
                Dense(1, activation='sigmoid')
            ])

        elif model == 'cnn1d_emb':

            model = Sequential([
                Embedding(V,
                          E,
                          input_length=S,
                          weights=[self.emb],
                          trainable=False),
                SpatialDropout1D(0.2),
                Conv1D(128, 5, padding='same', activation='relu'),
                Dropout(0.5),
                MaxPooling1D(),
                Flatten(),
                Dense(100, activation='relu'),
                Dropout(0.7),
                Dense(1, activation='sigmoid')
            ])

        elif model == 'lstm':

            model = Sequential([
                Embedding(V,
                          E,
                          input_length=S,
                          weights=[self.emb],
                          trainable=False),
                LSTM(100),
                Dense(1, activation='sigmoid')
            ])

        model.compile(loss='binary_crossentropy',
                      optimizer=Adam(),
                      metrics=['accuracy'])

        print(model.summary())
        return model
ytrainres_cat = to_categorical(y_train_res, num_classes=2)
yvalres_cat = to_categorical(y_val_res, num_classes=2)
ytestres_cat = to_categorical(y_test_res, num_classes=2)

print((X_train_res.shape, ytrainres_cat.shape, X_val_res.shape,
       yvalres_cat.shape, X_test_res.shape, ytestres_cat.shape))

# Training
# LSTM Model

n_most_common_words = 1000  #150
model = Sequential()
# n_most_common_words=Size of the vocabulary, emb_dim=Dimension of the dense embedding, input_length=Length of input sequences, when it is constant
model.add(
    Embedding(n_most_common_words, emb_dim, input_length=X_train_res.shape[1]))
model.add(SpatialDropout1D(dropout))
model.add(LSTM(LSTM_units, dropout=dropout, recurrent_dropout=dropout))
model.add(Dense(2, activation='sigmoid'))
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
print(model.summary())

import time
start_time = time.time()

history = model.fit(
    X_train_res,
    ytrainres_cat,
    epochs=epochs,
    batch_size=batch_size,
    validation_split=0.0,
def RnnVersion3(n_recurrent=50, n_dense=50, word_embedding_matrix= None, n_filters=50,dropout_rate=0.2, l2_penalty=0.0001,
               n_capsule = 10, n_routings = 5, capsule_dim = 16,):
    K.clear_session()
    
    def conv_block(x, n, kernel_size):
        x = Conv1D(n, kernel_size, activation='relu') (x)
        x = Conv1D(n_filters, kernel_size, activation='relu') (x)
        x_att = AttentionWithContext()(x)
        x_avg = GlobalAvgPool1D()(x)
        x_max = GlobalMaxPool1D()(x)
        return concatenate([x_att, x_avg, x_max])   
    def att_max_avg_pooling(x):
        x_att = AttentionWithContext()(x)
        x_avg = GlobalAvgPool1D()(x)
        x_max = GlobalMaxPool1D()(x)
        return concatenate([x_att, x_avg, x_max])

    input1_= Input(shape=(170, ), name='input1')
    input2_ = Input(shape=(433, ), name='input2')
    emb = Embedding(21099, 300,trainable=True)(input1_)

    # model 0
    x0 = SpatialDropout1D(dropout_rate)(emb)
    s0 = Bidirectional(
        CuDNNGRU(2*n_recurrent, return_sequences=True,
                 kernel_regularizer=l2(l2_penalty),
                 recurrent_regularizer=l2(l2_penalty)))(x0)
    x0 = att_max_avg_pooling(s0)

    # model 1
    x1 = SpatialDropout1D(dropout_rate)(emb)
    s1 = Bidirectional(
        CuDNNGRU(2*n_recurrent, return_sequences=True,
                 kernel_regularizer=l2(l2_penalty),
                 recurrent_regularizer=l2(l2_penalty)))(x1)
    x1 = att_max_avg_pooling(s1)
    
    # combine sequence output
    x = concatenate([s0, s1])
#     x = att_max_avg_pooling(x)
    x = Bidirectional(
        CuDNNGRU(n_recurrent, return_sequences=True, 
                 kernel_regularizer=l2(l2_penalty),
                 recurrent_regularizer=l2(l2_penalty)))(x)
    x = att_max_avg_pooling(x)
    

    # combine it all
    x = concatenate([x,x0, x1,input2_],name = 'concatenate')
    
    x = Dense(1024, activation='relu')(x)
    x = Dropout(dropout_rate)(x)

    x = Dense(256, activation='relu')(x)
    x = Dropout(dropout_rate)(x)
    
    x = Dense(128, activation='relu')(x)
    x = Dropout(dropout_rate)(x)       
 #   fc = Dense(120, activation='relu')(x)
    outputs = Dense(6, activation='softmax')(x)
    model = Model(inputs=[input1_,input2_], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='nadam',metrics =['accuracy'])
    return model
Beispiel #20
0
epochs = 25  # 20
weights = True
trainable = True
previous_weights = None
activation = 'sigmoid'
# ======= =======

print('Build model...')
model = Sequential()

if weights:
    model.add(Embedding(max_features, emb_dim, weights=[p.embedding_matrix], trainable=trainable))
else:
    model.add(Embedding(max_features, emb_dim))

model.add(SpatialDropout1D(spatial_dropout))
model.add(Bidirectional(QRNN(emb_dim//2, window_size=window_size, dropout=dropout,
                             kernel_regularizer=l2(kernel_regularizer), bias_regularizer=l2(bias_regularizer),
                             kernel_constraint=maxnorm(kernel_constraint), bias_constraint=maxnorm(bias_constraint))))
model.add(Dropout(dropout))
model.add(Dense(1, activation=activation))

plot_losses = PlotLosses()
plot_accuracy = PlotAccuracy()

reduce_rate = ReduceLROnPlateau(monitor='val_loss')
callbacks_list = [plot_losses, reduce_rate, plot_accuracy]

if clipnorm:
    optimizer = optimizers.Adam(lr=lr, clipnorm=clipnorm)
else:
Beispiel #21
0
def deepmoji_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_l2=1E-6, return_attention=False):
    """
    Returns the DeepMoji architecture uninitialized and
    without using the pretrained model weights.

    # Arguments:
        nb_classes: Number of classes in the dataset.
        nb_tokens: Number of tokens in the dataset (i.e. vocabulary size).
        maxlen: Maximum length of a token.
        feature_output: If True the model returns the penultimate
                        feature vector rather than Softmax probabilities
                        (defaults to False).
        embed_dropout_rate: Dropout rate for the embedding layer.
        final_dropout_rate: Dropout rate for the final Softmax layer.
        embed_l2: L2 regularization for the embedding layerl.

    # Returns:
        Model with the given parameters.
    """
    # define embedding layer that turns word tokens into vectors
    # an activation function is used to bound the values of the embedding
    model_input = Input(shape=(maxlen,), dtype='int32')
    embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None
    embed = Embedding(input_dim=nb_tokens,
                      output_dim=256,
                      mask_zero=True,
                      input_length=maxlen,
                      embeddings_regularizer=embed_reg,
                      name='embedding')
    x = embed(model_input)
    x = Activation('tanh')(x)

    # entire embedding channels are dropped out instead of the
    # normal Keras embedding dropout, which drops all channels for entire words
    # many of the datasets contain so few words that losing one or more words can alter the emotions completely
    if embed_dropout_rate != 0:
        embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop')
        x = embed_drop(x)

    # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features
    # ordering of the way the merge is done is important for consistency with the pretrained model
    lstm_0_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_0")(x)
    lstm_1_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_1")(lstm_0_output)
    x = concatenate([lstm_1_output, lstm_0_output, x])

    # if return_attention is True in AttentionWeightedAverage, an additional tensor
    # representing the weight at each timestep is returned
    weights = None
    x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x)
    if return_attention:
        x, weights = x

    if not feature_output:
        # output class probabilities
        if final_dropout_rate != 0:
            x = Dropout(final_dropout_rate)(x)

        if nb_classes > 2:
            outputs = [Dense(nb_classes, activation='softmax', name='softmax')(x)]
        else:
            outputs = [Dense(1, activation='sigmoid', name='softmax')(x)]
    else:
        # output penultimate feature vector
        outputs = [x]

    if return_attention:
        # add the attention weights to the outputs if required
        outputs.append(weights)

    return Model(inputs=[model_input], outputs=outputs, name="DeepMoji")
Beispiel #22
0
def main():

    # ##################################     Arguments parser     ##################################
    parser = argparse.ArgumentParser()
    parser.add_argument('--path',
                        default='data/uci-news-aggregator.csv',
                        help='Path to dataset')
    parser.add_argument('--layer',
                        default='lstm',
                        help='lSTM or GRU training layer')
    parser.add_argument('--epochs',
                        type=int,
                        default=8,
                        help='lSTM or GRU training layer')
    args = parser.parse_args()

    print('model_' + args.layer + '_Ep' + str(args.epochs) + '.h5')
    print(args, args.layer, len(sys.argv))

    # ##################################     1. Data Loading     ##################################

    # Load data from pickle file
    with open("data/pickle_Xtrain.pkl", 'rb') as file:
        pkl_X_train = pickle.load(file)
    with open("data/pickle_ytrain.pkl", 'rb') as file:
        pkl_y_train = pickle.load(file)
    with open("data/pickle_Xtest.pkl", 'rb') as file:
        pkl_X_test = pickle.load(file)
    with open("data/pickle_ytest.pkl", 'rb') as file:
        pkl_y_test = pickle.load(file)

    X_test = pkl_X_test
    y_test = pkl_y_test
    X_train = pkl_X_train
    y_train = pkl_y_train

    # Load tokenizer from pickle file
    with open('data/tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)

    print("Data:", (X_train.shape, y_train.shape, X_test.shape, y_test.shape))

    # ##################################     2. Training     ##################################
    emb_dim = 128
    batch_size = 256
    n_most_common_words = 8000
    epochs = args.epochs

    # TensorBoard
    today = datetime.date.today()
    log_dir = "logs/fit/" + 'model_' + args.layer + '_Ep' + str(
        args.epochs) + '(' + str(today) + ')'
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir,
                                                       histogram_freq=1)
    print("Tensorboard:", log_dir)
    print()

    # Model defining
    model = Sequential()
    model.add(
        Embedding(n_most_common_words, emb_dim, input_length=X_train.shape[1]))
    model.add(SpatialDropout1D(0.7))

    if (not args.layer.lower() == 'lstm'):
        print('GRU MODEL')
        model.add(GRU(64, dropout=0.7, recurrent_dropout=0.7))
    else:
        print('LSTM MODEL')
        model.add(LSTM(64, dropout=0.7, recurrent_dropout=0.7))

    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['acc'])

    print(model.summary())
    print()
    plot_model(model,
               to_file='saved/model_plot_' + args.layer + '_Ep' +
               str(args.epochs) + '.png',
               show_shapes=True,
               show_layer_names=True)

    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, \
    validation_split=0.2,callbacks=[EarlyStopping(monitor='val_loss',patience=7, min_delta=0.0001), tensorboard_callback])

    # ##################################     3. Save Model     ##################################
    model.save('saved/model_' + args.layer + '_Ep' + str(args.epochs) +
               '.h5')  # creates a HDF5 file 'my_model.h5'
Beispiel #23
0
    ModelCheckpoint(
        filepath=
        './checkpoints/weights.epoch_{epoch:02d}-val_acc_{val_acc:.2f}.h5',
        monitor='loss',
        verbose=0,
        save_best_only=True),
]

CNNBranch = Sequential()
CNNBranch.add(
    Embedding(len(weights),
              output_dim=config.dims,
              weights=[weights],
              input_length=config.sequence_length))
CNNBranch.add(BatchNormalization())
CNNBranch.add(SpatialDropout1D(rate=config.dropout))
CNNBranch.add(
    Conv1D(filters=config.nb_filter,
           kernel_size=config.filter_length,
           padding='valid',
           activation='relu',
           strides=1))
CNNBranch.add(GlobalMaxPooling1D())
CNNBranch.add(BatchNormalization())
CNNBranch.add(Dropout(config.dropout))
CNNBranch.add(Dense(4, activation='softmax'))
CNNBranch.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
CNNBranch.summary()
CNNBranch.fit(X_train,
Beispiel #24
0
            # crop_end = seq_len
            seq_batch = sequences[i:i + batch_size, crop_start:crop_end, :]
            CAGE_batch = CAGEs[i:i + batch_size, crop_start:crop_end]
            CAGE_batch = np.reshape(CAGE_batch, (-1, seq_len, 1))
            yield (seq_batch, CAGE_batch)

model = Sequential()

for i in range(5):
    # so now we change from a (say)300 bp long vectors of 4 colors
    # to ~300bp long vectors of CONVSIZE colors, with each of those colors
    # based on 12 positions in the layer below
    model.add(Convolution1D(CONVSIZE, CONVWIDTH, border_mode='same', input_shape=(CROP_SIZE, NUCNUM)))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(SpatialDropout1D(0.2))
model.add(Convolution1D(512, 5, border_mode='same'))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(SpatialDropout1D(0.1))
model.add(Convolution1D(1, 1, border_mode='same'))

model.compile(loss='mse', optimizer='nadam')
print('Done compiling!')

model.fit_generator(training_batch_generator(sequences, CAGEs, BATCH_SIZE, CROP_SIZE),
                    samples_per_epoch=CAGEs.shape[0],
                    nb_epoch=20, verbose=1)

print('done Done Training!')
Beispiel #25
0
                 num_features)

    # Routings = 30
    # Num_capsule = 60
    # Dim_capsule = 120
    Routings = 15
    Num_capsule = 30
    Dim_capsule = 60

    sequence_input = Input(shape=(maxlen, ), dtype='int32')
    embedded_sequences = Embedding(input_dim=max_features,
                                   output_dim=num_features,
                                   input_length=maxlen,
                                   weights=[W],
                                   trainable=False)(sequence_input)
    embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences)
    x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences)
    x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x)
    capsule = Capsule(num_capsule=Num_capsule,
                      dim_capsule=Dim_capsule,
                      routings=Routings,
                      share_weights=True,
                      kernel_size=(3, 1))(x)
    # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule)
    capsule = Flatten()(capsule)
    capsule = Dropout(0.4)(capsule)
    output = Dense(3, activation='softmax')(capsule)
    model = Model(inputs=[sequence_input], outputs=output)

    rmsprop = optimizers.rmsprop(lr=0.01)
    model.compile(loss='categorical_crossentropy',
Beispiel #26
0
for word, i in word_index.items():
    if i >= max_features:
        continue

    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

# Build Model
inp = Input(shape=(maxlen, ))

x = Embedding(max_features,
              embed_size,
              weights=[embedding_matrix],
              trainable=True)(inp)
x = SpatialDropout1D(0.35)(x)

x = Bidirectional(
    LSTM(128, return_sequences=True, dropout=0.15, recurrent_dropout=0.15))(x)
x = Conv1D(64,
           kernel_size=3,
           padding='valid',
           kernel_initializer='glorot_uniform')(x)

avg_pool = GlobalAveragePooling1D()(x)
max_pool = GlobalMaxPooling1D()(x)
x = concatenate([avg_pool, max_pool])

out = Dense(6, activation='sigmoid')(x)

model = Model(inp, out)
Beispiel #27
0
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector
#----------------------------------

# LSTM model
#----------------------------------
model = Sequential()
model.add(
    Embedding(input_dim=vocab_size,
              output_dim=EMBEDDING_DIM,
              weights=[embedding_matrix],
              input_length=X_train.shape[1],
              trainable=True))  #, input_length=4

model.add(SpatialDropout1D(0.5))
model.add(LSTM(30, return_sequences=True, recurrent_dropout=0.5))
model.add(LSTM(30, dropout=0.5, recurrent_dropout=0.5))
model.add(Dense(30, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])  #, Precision(), Recall()

print(model.summary())
#----------------------------------

# Fit model
#----------------------------------
batch_size = 128
print(no_to_text)

# Design NN architecture
# After this you can have keras, tensorflow, or pytorch framework for yor neural network

# KERAS
model = Sequential()
model.add(Embedding(
    n_unique_words, n_dim,
    input_length=max_review_lenth))  # it cnvert word into vector space
# The first argument (n_unique_words) n embedded layer is the number of distinct words in the training set.
# here n_unique word is required so as to find the lenth of one hot encoding creating for each word so as use in neural network
# The second argument (n_dim) indicates the size of the embedding vectors
# The input_length argumet, of course, determines the size of each input sequence.
# model.output_shape == (None, max_review_lenth, n_dim), where None is the batch dimension
model.add(SpatialDropout1D(drop_emd))
model.add(Bidirectional(LSTM(n_lstm, dropout=lstm_drpout)))
model.add(Dense(1, activation='sigmoid'))

print(model.summary())

# compiling model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

modelcheckpoint = ModelCheckpoint(filepath=output_dir +
                                  "/weights.{epoch:02d}.hdf5")

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
Beispiel #29
0
                    output_dim=embedding_size,
                    embeddings_initializer=keras.initializers.Zeros(),#'uniform',
                    embeddings_regularizer=regularizer,
                    mask_zero=True,
                    input_length=maxlen,
                    name='embedding_1'))

'''model.add(Embedding(vocab_size, 
                    embedding_size,
                    input_length=maxlen,
                    W_regularizer=regularizer,
                    dropout=p_emb, weights=[embedding],
                    mask_zero=True,
                    name='embedding_1'))'''

model.add(SpatialDropout1D(p_emb, name='dropout_emb_1'))

for i in range(rnn_layers):
    #New
    #keras.layers.recurrent.LSTM(units, activation='tanh', recurrent_activation='hard_sigmoid', 
    #     use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', 
    #     bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, 
    #     bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, 
    #     bias_constraint=None, dropout=0.0, recurrent_dropout=0.0)
    
    lstm = LSTM(units=rnn_size,
                return_sequences=True,
                kernel_regularizer=regularizer, #kernel_regularizer
                recurrent_regularizer=regularizer, #recurrent_regularizer
                bias_regularizer=regularizer, #bias_regularizer
                dropout=p_W, #dropout
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)
X[:2]

# Next, I compose the LSTM Network. Note that **embed_dim**, **lstm_out**, **batch_size**, **droupout_x** variables are hyperparameters, their values are somehow intuitive, can be and must be played with in order to achieve good results. Please also note that I am using softmax as activation function. The reason is that our Network is using categorical crossentropy, and softmax is just the right activation method for that.

# In[7]:

embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

# Hereby I declare the train and test dataset.

# In[8]:

Y = pd.get_dummies(data['sentiment']).values
X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.20,