def build_reference_annotation_1d_model_from_args(args, conv_width = 6, conv_layers = [128, 128, 128, 128], conv_dropout = 0.0, conv_batch_normalize = False, spatial_dropout = True, max_pools = [], padding='valid', activation = 'relu', annotation_units = 16, annotation_shortcut = False, annotation_batch_normalize = True, fc_layers = [64], fc_dropout = 0.0, fc_batch_normalize = False, fc_initializer = 'glorot_normal', kernel_initializer = 'glorot_normal', alpha_dropout = False ): '''Build Reference 1d CNN model for classifying variants. Architecture specified by parameters. Dynamically sets input channels based on args via defines.total_input_channels_from_args(args) Uses the functional API. Prints out model summary. Arguments args.annotations: The variant annotations, perhaps from a HaplotypeCaller VCF. args.labels: The output labels (e.g. SNP, NOT_SNP, INDEL, NOT_INDEL) Returns The keras model ''' in_channels = tensor_maps.total_input_channels_from_args(args) concat_axis = -1 x = reference = Input(shape=(args.window_size, in_channels), name=args.tensor_name) max_pool_diff = len(conv_layers)-len(max_pools) for i,c in enumerate(conv_layers): if conv_batch_normalize: x = Conv1D(filters=c, kernel_size=conv_width, activation='linear', padding=padding, kernel_initializer=kernel_initializer)(x) x = BatchNormalization(axis=concat_axis)(x) x = Activation(activation)(x) else: x = Conv1D(filters=c, kernel_size=conv_width, activation=activation, padding=padding, kernel_initializer=kernel_initializer)(x) if conv_dropout > 0 and alpha_dropout: x = AlphaDropout(conv_dropout)(x) elif conv_dropout > 0 and spatial_dropout: x = SpatialDropout1D(conv_dropout)(x) elif conv_dropout > 0: x = Dropout(conv_dropout)(x) if i >= max_pool_diff: x = MaxPooling1D(max_pools[i-max_pool_diff])(x) f = Flatten()(x) annotations = annotations_in = Input(shape=(len(args.annotations),), name=args.annotation_set) if annotation_batch_normalize: annotations_in = BatchNormalization(axis=concat_axis)(annotations_in) annotation_mlp = Dense(units=annotation_units, kernel_initializer=fc_initializer, activation=activation)(annotations_in) x = layers.concatenate([f, annotation_mlp], axis=1) for fc in fc_layers: if fc_batch_normalize: x = Dense(units=fc, activation='linear', kernel_initializer=fc_initializer)(x) x = BatchNormalization(axis=1)(x) x = Activation(activation)(x) else: x = Dense(units=fc, activation=activation, kernel_initializer=fc_initializer)(x) if fc_dropout > 0 and alpha_dropout: x = AlphaDropout(fc_dropout)(x) elif fc_dropout > 0: x = Dropout(fc_dropout)(x) if annotation_shortcut: x = layers.concatenate([x, annotations_in], axis=1) prob_output = Dense(units=len(args.labels), activation='softmax', name='softmax_predictions')(x) model = Model(inputs=[reference, annotations], outputs=[prob_output]) adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, clipnorm=1.) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=get_metrics(args.labels)) model.summary() if os.path.exists(args.weights_hd5): model.load_weights(args.weights_hd5, by_name=True) print('Loaded model weights from:', args.weights_hd5) return model
embedding_matrix = np.zeros((num_words, embed_size)) for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # In[ ]: sequence_input = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable = False)(sequence_input) x = SpatialDropout1D(0.2)(x) x = Bidirectional(GRU(128, return_sequences=True,dropout=0.1,recurrent_dropout=0.1))(x) x = Conv1D(64, kernel_size = 3, padding = "valid", kernel_initializer = "glorot_uniform")(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) x = concatenate([avg_pool, max_pool]) # x = Dense(128, activation='relu')(x) # x = Dropout(0.1)(x) preds = Dense(6, activation="sigmoid")(x) model = Model(sequence_input, preds) model.compile(loss='binary_crossentropy',optimizer=Adam(lr=1e-3),metrics=['accuracy']) # In[ ]:
ln = [len(i.split()) for i in corpus] max_len = max(ln) from keras.preprocessing.sequence import pad_sequences w2v_pad = pad_sequences(w2v, maxlen=max_len) inp = max([max(i) for i in w2v]) model2 = Sequential() from keras.layers import Embedding model2.add( Embedding(input_dim=inp + 1, output_dim=128, input_length=w2v_pad.shape[1])) from keras.layers import SpatialDropout1D model2.add(SpatialDropout1D(rate=.1)) model2.summary() from keras.layers import LSTM model2.add(LSTM(units=300, dropout=.1, recurrent_dropout=.1)) model2.add(Dense(3, activation='softmax')) model2.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer='adam') model2.fit(x=w2v_pad, y=c2, epochs=5) model2.predict(w2v_pad) #prd2=pd.DataFrame(data=np.round(model2.predict(w2v_pad)),columns=idx) """ #notes dense=1 is for continous, then you dont hot encode the y.
embedding_matrix = np.concatenate((embedding_matrix_1, embedding_matrix_2, embedding_matrix_3, embedding_matrix_4), axis=1) del embedding_matrix_1, embedding_matrix_2, embedding_matrix_3, embedding_matrix_4 gc.collect() np.shape(embedding_matrix) # **LSTM:** # In[ ]: # https://www.kaggle.com/sudalairajkumar/a-look-at-different-embeddings # https://www.kaggle.com/strideradu/word2vec-and-gensim-go-go-go inp = Input(shape=(maxlen,)) x = Embedding(max_features, embed_size * 4, weights=[embedding_matrix])(inp) x = SpatialDropout1D(S_DROPOUT)(x) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) conc = concatenate([avg_pool, max_pool]) x = Dense(16, activation="relu")(conc) x = Dropout(DROPOUT)(x) x = Dense(1, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # In[ ]: model.fit(train_X, train_y, batch_size=512, epochs=2, validation_data=(val_X, val_y))
for col in cont_cols: X[col] = dataset[col].values return X # Dictionary of inputs emb_n = 40 dense_n = 1000 # Build the inputs, embeddings and concatenate them all for each column emb_inputs = dict((col, Input(shape=[1], name=col)) for col in embids) cont_inputs = dict((col, Input(shape=[1], name=col)) for col in cont_cols) emb_model = dict( (col, Embedding(embmaxs[col], emb_n)(emb_inputs[col])) for col in embids) fe = concatenate([(emb_) for emb_ in emb_model.values()]) ### Rest of the model s_dout = SpatialDropout1D(0.1)(fe) fl1 = Flatten()(s_dout) # conv_layers = dict(( ('conv'+str(i), Conv1D(int(200/i), kernel_size=2**i, strides=1, padding='same', name = 'conv'+str(i))(s_dout)) for i in range(2, 5))) conv1 = Conv1D(400, kernel_size=4, strides=1, padding='same', name='conv1')(s_dout) conv2 = Conv1D(200, kernel_size=8, strides=1, padding='same', name='conv2')(conv1) conv3 = Conv1D(100, kernel_size=16, strides=1, padding='same', name='conv3')(conv2) conv4 = Conv1D(50, kernel_size=32, strides=1, padding='same', name='conv4')(conv3) #flatten_layers = dict(( ('flatten_conv'+str(i), Flatten(name = 'flatten_conv'+str(i))(conv_layers['conv'+str(i)]) ) for i in range(2, 5))) flatten_layer = Flatten(name='flatten_conv4')(conv4) # concat = concatenate([(f_inp) for f_inp in flatten_layers.values()] + [(c_inp) for c_inp in cont_inputs.values()]) concat = concatenate([(flatten_layer)] + [(c_inp) for c_inp in cont_inputs.values()])
def compile_elmo(self, print_summary=False): """ Compiles a Language Model RNN based on the given parameters """ if self.parameters['token_encoding'] == 'word': # Train word embeddings from scratch word_inputs = Input(shape=(None, ), name='word_indices', dtype='int32') embeddings = Embedding(self.parameters['vocab_size'], self.parameters['hidden_units_size'], trainable=True, name='token_encoding') inputs = embeddings(word_inputs) # Token embeddings for Input drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') elif self.parameters['token_encoding'] == 'char': # Train character-level representation word_inputs = Input(shape=( None, self.parameters['token_maxlen'], ), dtype='int32', name='char_indices') inputs = self.char_level_token_encoder()(word_inputs) # Token embeddings for Input drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') # Reversed input for backward LSTMs re_lstm_inputs = Lambda(function=ELMo.reverse)(lstm_inputs) mask = Lambda(function=ELMo.reverse)(drop_inputs) # Forward LSTMs for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) else: lstm = LSTM(units=self.parameters['lstm_units_size'], return_sequences=True, activation="tanh", recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs]) # Projection to hidden_units_size proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(lstm) # Merge Bi-LSTMs feature vectors with the previous ones lstm_inputs = add([proj, lstm_inputs], name='f_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(lstm_inputs) # Backward LSTMs for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: re_lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) else: re_lstm = LSTM( units=self.parameters['lstm_units_size'], return_sequences=True, activation='tanh', recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask]) # Projection to hidden_units_size re_proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(re_lstm) # Merge Bi-LSTMs feature vectors with the previous ones re_lstm_inputs = add([re_proj, re_lstm_inputs], name='b_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers re_lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(re_lstm_inputs) # Reverse backward LSTMs' outputs = Make it forward again re_lstm_inputs = Lambda(function=ELMo.reverse, name="reverse")(re_lstm_inputs) # Project to Vocabulary with Sampled Softmax sampled_softmax = SampledSoftmax( num_classes=self.parameters['vocab_size'], num_sampled=int(self.parameters['num_sampled']), tied_to=embeddings if self.parameters['weight_tying'] else None) outputs = sampled_softmax([lstm_inputs, next_ids]) re_outputs = sampled_softmax([re_lstm_inputs, previous_ids]) self._model = Model(inputs=[word_inputs, next_ids, previous_ids], outputs=[outputs, re_outputs]) self._model.compile(optimizer=Adagrad( lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']), loss=None) if print_summary: self._model.summary()
def CNNRNN_architecture(X_train_word_seq, X_test_word_seq, Y_train, max_seq_len, nb_words, embedding_matrix, embed_dim, output_dir, job_number, batch_size, num_epochs, num_filters, weight_decay): num_classes = np.max(Y_train) + 1 # CNN architecture print("training CNN RNN...") model = Sequential() model.add( Embedding(nb_words, embed_dim, weights=[embedding_matrix], input_length=max_seq_len, trainable=False)) model.add(SpatialDropout1D(0.2)) model.add(Conv1D(num_filters, 3, activation='relu', padding='same')) model.add(MaxPooling1D(2)) # model.add(Bidirectional(GRU(128, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))) model.add(GlobalMaxPooling1D()) model.add( Dense(32, activation='relu', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Dense(num_classes, activation='sigmoid')) # multi-label (k-hot encoding) adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) # checkpoint filepath = output_dir checkpoint = ModelCheckpoint(filepath + "best.h5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') model.summary() # define callbacks early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.01, patience=5, verbose=1) callbacks_list = [early_stopping, checkpoint] # to categorical TO DO Y_train = utils.to_categorical(Y_train, num_classes) # model training hist = model.fit(X_train_word_seq, Y_train, batch_size=batch_size, epochs=num_epochs, callbacks=callbacks_list, validation_split=0.2, shuffle=True, verbose=2) print('X_test_word_seq.shape', X_test_word_seq.shape) Y_predict = model.predict(X_test_word_seq) print('Y_predict', Y_predict) np.save(str(output_dir) + 'Y_predict.npy', Y_predict) model.save(str(output_dir) + 'model' + str(job_number)) return Y_predict
def rnn(embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) q1_encoded = Dropout(0.2)(q1_encoded) q2_encoded = Dropout(0.2)(q2_encoded) # 双向 # q1_encoded = encode2(q1_encoded) # q2_encoded = encode2(q2_encoded) # resnet rnn_layer2_input1 = concatenate([q1_embed, q1_encoded]) rnn_layer2_input2 = concatenate([q2_embed, q2_encoded]) q1_encoded2 = encode2(rnn_layer2_input1) q2_encoded2 = encode2(rnn_layer2_input2) # add res shortcut res_block1 = add([q1_encoded, q1_encoded2]) res_block2 = add([q2_encoded, q2_encoded2]) rnn_layer3_input1 = concatenate([q1_embed, res_block1]) rnn_layer3_input2 = concatenate([q2_embed, res_block2]) # rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2]) # rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2]) q1_encoded3 = encode3(rnn_layer3_input1) q2_encoded3 = encode3(rnn_layer3_input2) # merged1 = GlobalMaxPool1D()(q1_encoded3) # merged2 = GlobalMaxPool1D()(q2_encoded3) # q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1) # q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1) # merged1 = concatenate([q1_encoded2, q1_embed], axis=-1) # merged2 = concatenate([q2_encoded2, q2_embed], axis=-1) # # TODO add attention rep , maxpooling rep q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3]) q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3]) merged1 = GlobalMaxPool1D()(q1_encoded3) merged2 = GlobalMaxPool1D()(q2_encoded3) # avg1 = GlobalAvgPool1D()(q1_encoded3) # avg2 = GlobalAvgPool1D()(q2_encoded3) # merged1 = concatenate([max1,avg1]) # merged2 = concatenate([max2,avg2]) sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2]) mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2]) # jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)- # K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2]) # merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep]) merged = Concatenate()([merged1, merged2, mul_rep, sub_rep]) # Classifier dense = Dropout(config['dense_dropout'])(merged) dense = BatchNormalization()(dense) dense = Dense(config['dense_dim'], activation='relu')(dense) dense = Dropout(config['dense_dropout'])(dense) dense = BatchNormalization()(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
output_dim=20, input_length=max_len, mask_zero=True)(input) ###################################################### ## Option2: Word2Vec Embedding Layer ###################################################### # wv_layer = Embedding(n_tokens, # vec_dimension, # mask_zero=False, weights=[wv_matrix], # input_length=max_len, # trainable=False) # model = wv_layer(input) # embedded_sequences # Input Dropout model = SpatialDropout1D(0.1)(model) model = Bidirectional(LSTM(units=100, return_sequences=True))(model) # Output output_drop = Dropout(0.2)(model) dense = TimeDistributed(Dense(n_tags, activation="softmax"))( output_drop) # softmax output layer # dense = TimeDistributed(Dense(100, activation="softmax"))(output_drop) crf = CRF(n_tags) # CRF layer # crf = CRF(n_tags,sparse_target=True) # CRF layer out = crf(dense) # output # out = CRF(n_tags)(dense) ### 3. Build Model model = Model(inputs=input, outputs=out) batch_size = 32
from keras.layers import SpatialDropout1D my_input = Input(shape=(None, )) embedding = Embedding( input_dim=embedding_matrix.shape[0], input_length=max_seq_len, output_dim=word_vector_dim, trainable=True, )(my_input) x = Conv1D( filters=nb_filters, kernel_size=filter_size_a, activation='relu', )(embedding) x = SpatialDropout1D(drop_rate)(x) x = MaxPooling1D(pool_size=5)(x) x = Flatten()(x) x = Dense(128, activation='relu')(x) prob = Dense( 6, activation='softmax', )(x) model = Model(my_input, prob) model.compile(loss='categorical_crossentropy', optimizer=my_optimizer, metrics=['accuracy']) model.fit( x_train,
networkcore_emb = sparse.load_npz("model/weibo_coreembedding.npz").todense() embeddedc = Embedding(len(words) + 1, actors_size, embeddings_initializer=Constant(networkcore_emb), input_length=seqlen, mask_zero=False, trainable=True)(seqsb) dropout = Dropout(rate=Dropoutrate)(seqsa) middle = Dense(Hidden, activation='relu', kernel_regularizer=regularizers.l2(Regularization))(dropout) batchNorm = BatchNormalization()(middle) dropoutb = SpatialDropout1D(rate=Dropoutrate)(embedded) blstm = Bidirectional(CuDNNGRU(Hidden, return_sequences=False), merge_mode='sum')(dropoutb) batchNormb = BatchNormalization()(blstm) dropoutc = SpatialDropout1D(rate=Dropoutrate)(embeddedc) conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(dropoutc) mpool = MaxPooling1D()(conv) conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(mpool) mpool = MaxPooling1D()(conv) conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(mpool) mpool = MaxPooling1D()(conv) conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(mpool) mpool = MaxPooling1D()(conv) conv = Conv1D(filters=nfilters, kernel_size=kernelSize)(mpool) mpool = MaxPooling1D()(conv)
def get_av_pos_cnn(): filter_nums = 325 drop_rate = 0.5 input_layer = Input(shape=(MAX_SEQUENCE_LENGTH, ), name='Onehot') input_layer_2 = Input(shape=(MAX_SEQUENCE_LENGTH, ), name='POS') embedding_layer = Embedding(VOCAB_SIZE, EMBEDDING_SIZE, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(input_layer) embedding_layer2 = Embedding(50, 30, input_length=MAX_SEQUENCE_LENGTH, trainable=True)(input_layer_2) embedding_layer = concatenate([embedding_layer, embedding_layer2], axis=2) embedded_sequences = SpatialDropout1D(0.25)(embedding_layer) conv_0 = Conv1D(filter_nums, 1, kernel_initializer="normal", padding="valid", activation="relu")(embedded_sequences) conv_1 = Conv1D(filter_nums, 2, kernel_initializer="normal", padding="valid", activation="relu")(embedded_sequences) conv_2 = Conv1D(filter_nums, 3, kernel_initializer="normal", padding="valid", activation="relu")(embedded_sequences) conv_3 = Conv1D(filter_nums, 4, kernel_initializer="normal", padding="valid", activation="relu")(embedded_sequences) attn_0 = Attention(MAX_SEQUENCE_LENGTH)(conv_0) avg_0 = GlobalAveragePooling1D()(conv_0) maxpool_0 = GlobalMaxPooling1D()(conv_0) maxpool_1 = GlobalMaxPooling1D()(conv_1) attn_1 = Attention(MAX_SEQUENCE_LENGTH)(conv_1) avg_1 = GlobalAveragePooling1D()(conv_1) maxpool_2 = GlobalMaxPooling1D()(conv_2) attn_2 = Attention(MAX_SEQUENCE_LENGTH)(conv_2) avg_2 = GlobalAveragePooling1D()(conv_2) maxpool_3 = GlobalMaxPooling1D()(conv_3) attn_3 = Attention(MAX_SEQUENCE_LENGTH)(conv_3) avg_3 = GlobalAveragePooling1D()(conv_3) v0_col = merge([maxpool_0, maxpool_1, maxpool_2, maxpool_3], mode='concat', concat_axis=1) v1_col = merge([attn_0, attn_1, attn_2, attn_3], mode='concat', concat_axis=1) v2_col = merge([avg_1, avg_2, avg_0, avg_3], mode='concat', concat_axis=1) merged_tensor = merge([v0_col, v1_col, v2_col], mode='concat', concat_axis=1) output = Dropout(0.7)(merged_tensor) output = Dense(units=144)(output) output = Activation('relu')(output) # output = Dropout(0.5)(output) output = Dense(units=6, activation='sigmoid')(output) model = Model(inputs=[input_layer, input_layer_2], outputs=output) model.compile(loss='binary_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) return model
def get_model(): nclass = 5 inp = Input(shape=(3000, 1)) img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")(inp) img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")(img_1) img_1 = MaxPool1D(pool_size=2)(img_1) img_1 = SpatialDropout1D(rate=0.01)(img_1) img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1) img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1) img_1 = MaxPool1D(pool_size=2)(img_1) img_1 = SpatialDropout1D(rate=0.01)(img_1) img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1) img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1) img_1 = MaxPool1D(pool_size=2)(img_1) img_1 = SpatialDropout1D(rate=0.01)(img_1) img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1) img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1) img_1 = GlobalMaxPool1D()(img_1) img_1 = Dropout(rate=0.01)(img_1) dense_1 = Dropout(rate=0.01)(Dense(64, activation=activations.relu, name="dense_1")(img_1)) dense_1 = Dropout(rate=0.05)(Dense(64, activation=activations.relu, name="dense_2")(dense_1)) dense_1 = Dense(nclass, activation=activations.softmax, name="dense_3")(dense_1) model = models.Model(inputs=inp, outputs=dense_1) opt = optimizers.Adam(0.001) model.compile(optimizer=opt, loss=losses.sparse_categorical_crossentropy, metrics=['acc']) model.summary() return model
def cabasc(self): def sequence_mask(sequence): return K.sign(K.max(K.abs(sequence), 2)) def sequence_length(sequence): return K.cast(K.sum(sequence_mask(sequence), 1), tf.int32) input_text = Input(shape=(self.max_len, )) input_text_l = Input(shape=(self.max_len, )) input_text_r = Input(shape=(self.max_len, )) input_aspect = Input(shape=(1, )) input_mask = Input(shape=(self.max_len, )) word_embedding = Embedding(input_dim=self.max_content_vocab_size, output_dim=self.content_embed_dim) text_embed = SpatialDropout1D(0.2)(word_embedding(input_text)) text_l_embed = SpatialDropout1D(0.2)(word_embedding(input_text_l)) text_r_embed = SpatialDropout1D(0.2)(word_embedding(input_text_r)) asp_embedding = Embedding(input_dim=self.max_aspect_vocab_size, output_dim=self.aspect_embed_dim) aspect_embed = asp_embedding(input_aspect) aspect_embed = Flatten()(aspect_embed) # reshape to 2d # regarding aspect string as the first unit hidden_l = GRU(self.lstm_units, go_backwards=True, return_sequences=True)(text_l_embed) hidden_r = GRU(self.lstm_units, return_sequences=True)(text_r_embed) # left context attention context_attend_l = TimeDistributed(Dense( 1, activation='sigmoid'))(hidden_l) # Note: I couldn't find `reverse_sequence` in keras context_attend_l = Lambda(lambda x: tf.reverse_sequence( x, sequence_length(x), 1, 0))(context_attend_l) context_attend_l = Lambda(lambda x: K.squeeze(x, -1))(context_attend_l) # right context attention context_attend_r = TimeDistributed(Dense( 1, activation='sigmoid'))(hidden_r) context_attend_r = Lambda(lambda x: K.squeeze(x, -1))(context_attend_r) # combine context attention # aspect_text_embed = subtract([add([text_l_embed, text_r_embed]), text_embed]) # aspect_text_mask = Lambda(lambda x: sequence_mask(x))(aspect_text_embed) # text_mask = Lambda(lambda x: sequence_mask(x))(text_embed) # context_mask = subtract([text_mask, aspect_text_mask]) # aspect_text_mask_half = Lambda(lambda x: x*0.5)(aspect_text_mask) # combine_mask = add([context_mask, aspect_text_mask_half]) # 1 for context, 0.5 for aspect context_attend = multiply( [add([context_attend_l, context_attend_r]), input_mask]) # apply context attention context_attend_expand = Lambda(lambda x: K.expand_dims(x))( context_attend) memory = multiply([text_embed, context_attend_expand]) # sentence-level content attention sentence = Lambda(lambda x: K.mean(x, axis=1))(memory) final_output = ContentAttention()([memory, aspect_embed, sentence]) dense_layer = Dense(self.dense_units, activation='relu')(final_output) output_layer = Dense(self.n_classes, activation='softmax')(dense_layer) return Model( [input_text, input_text_l, input_text_r, input_aspect, input_mask], output_layer)
def build(self): depth = [4, 4, 10, 10] pooling_type = 'maxpool' use_shortcut = False input_sent = Input(shape=(self.config.max_len_word,), dtype='int32', name='sent_base') weights = np.load( os.path.join(self.config.embedding_path, self.config.level + '_level', self.config.embedding_file)) embedding_layer = Embedding(input_dim=weights.shape[0], output_dim=weights.shape[-1], weights=[weights], name='embedding_layer', trainable=True) sent_embedding = embedding_layer(input_sent) text_embed = SpatialDropout1D(0.2)(sent_embedding) # first temporal conv layer conv_out = Conv1D(filters=64, kernel_size=3, kernel_initializer='he_uniform', padding='same')(text_embed) shortcut = conv_out # temporal conv block: 64 for i in range(depth[0]): if i < depth[0] - 1: shortcut = conv_out conv_out = self.conv_block(inputs=conv_out, filters=64, use_shortcut=use_shortcut, shortcut=shortcut) else: # shortcut is not used at the last conv block conv_out = self.conv_block(inputs=conv_out, filters=64, use_shortcut=use_shortcut, shortcut=None) # down-sampling # shortcut is the second last conv block output conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut, shortcut=shortcut) shortcut = conv_out # temporal conv block: 128 for i in range(depth[1]): if i < depth[1] - 1: shortcut = conv_out conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=shortcut) else: # shortcut is not used at the last conv block conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=None) # down-sampling conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut, shortcut=shortcut) shortcut = conv_out # temporal conv block: 256 for i in range(depth[2]): if i < depth[1] - 1: shortcut = conv_out conv_out = self.conv_block(inputs=conv_out, filters=256, use_shortcut=use_shortcut, shortcut=shortcut) else: # shortcut is not used at the last conv block conv_out = self.conv_block(inputs=conv_out, filters=256, use_shortcut=use_shortcut, shortcut=None) # down-sampling conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut, shortcut=shortcut) # temporal conv block: 512 for i in range(depth[3]): if i < depth[1] - 1: shortcut = conv_out conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=shortcut) else: # shortcut is not used at the last conv block conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=None) # 8-max pooling conv_out = KMaxPooling(k=8)(conv_out) flatten = Flatten()(conv_out) fc1 = Dense(2048, activation='relu')(flatten) sentence_embed = Dense(2048, activation='relu')(fc1) dense_layer = Dense(256, activation='relu')(sentence_embed) output = Dense(self.config.num_classes, activation='softmax')(dense_layer) return input_sent, output
def get_char_embedding_model(): ## Imports from keras.models import Model, Input from keras.layers import LSTM, Embedding, Dense, TimeDistributed from keras.layers import Bidirectional, concatenate, SpatialDropout1D ## Apparently the trick here is to wrap the parts that should be applied to characters in a TimeDistributed so that characters in a layer apply the same layers to every character sequence ## Returns a Tensor word_in = Input(shape=(constants.MAX_SENT_LEN, )) ortho_word_in = Input(shape=(constants.MAX_SENT_LEN, )) ## To find word embedding emb_word = Embedding(input_dim=n_words + 2, output_dim=20, input_length=constants.MAX_SENT_LEN, mask_zero=True)(word_in) ortho_emb_word = Embedding(input_dim=n_ortho_words + 2, output_dim=20, input_length=constants.MAX_SENT_LEN, mask_zero=True)(ortho_word_in) ## To find character embedding for characters of that word char_in = Input(shape=( constants.MAX_SENT_LEN, constants.MAX_WORD_LEN, )) emb_char = TimeDistributed( Embedding(input_dim=n_chars + 2, output_dim=10, input_length=constants.MAX_WORD_LEN, mask_zero=True))(char_in) ortho_char_in = Input(shape=( constants.MAX_SENT_LEN, constants.MAX_WORD_LEN, )) ortho_emb_char = TimeDistributed( Embedding(input_dim=n_ortho_chars + 2, output_dim=10, input_length=constants.MAX_WORD_LEN, mask_zero=True))(ortho_char_in) ## Character CNN to get the word encoding by characters # char_encoding = TimeDistributed(Conv1D()) char_encoding = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char) ortho_char_encoding = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(ortho_emb_char) print(char_encoding.shape, ' | ', ortho_char_encoding.shape) ## main LSTM x = concatenate( [char_encoding, emb_word, ortho_char_encoding, ortho_emb_word]) x = SpatialDropout1D(0.3)(x) main_lstm = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.6))(x) out = TimeDistributed(Dense(n_tags + 1, activation="softmax"))(main_lstm) model = Model([char_in, word_in, ortho_char_in, ortho_word_in], out) return model
def load_model(self, model): '''Loads Keras model and prints its summary. # Arguments: model: string, model type. # Returns: model: compiled Keras model ''' #Raname variables for briefness V, E, S = self.vocab_size, self.embed_size, self.seq_len if model == 'nn': model = Sequential([ Embedding(V, E, input_length=S), SpatialDropout1D(0.2), Flatten(), Dense(100, activation='relu'), Dropout(0.7), Dense(1, activation='sigmoid') ]) elif model == 'cnn1d': model = Sequential([ Embedding(V, E, input_length=S), SpatialDropout1D(0.2), Conv1D(64, 5, padding='same', activation='relu'), Dropout(0.3), MaxPooling1D(), Flatten(), Dense(100, activation='relu'), Dropout(0.7), Dense(1, activation='sigmoid') ]) elif model == 'cnn1d_emb': model = Sequential([ Embedding(V, E, input_length=S, weights=[self.emb], trainable=False), SpatialDropout1D(0.2), Conv1D(128, 5, padding='same', activation='relu'), Dropout(0.5), MaxPooling1D(), Flatten(), Dense(100, activation='relu'), Dropout(0.7), Dense(1, activation='sigmoid') ]) elif model == 'lstm': model = Sequential([ Embedding(V, E, input_length=S, weights=[self.emb], trainable=False), LSTM(100), Dense(1, activation='sigmoid') ]) model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy']) print(model.summary()) return model
ytrainres_cat = to_categorical(y_train_res, num_classes=2) yvalres_cat = to_categorical(y_val_res, num_classes=2) ytestres_cat = to_categorical(y_test_res, num_classes=2) print((X_train_res.shape, ytrainres_cat.shape, X_val_res.shape, yvalres_cat.shape, X_test_res.shape, ytestres_cat.shape)) # Training # LSTM Model n_most_common_words = 1000 #150 model = Sequential() # n_most_common_words=Size of the vocabulary, emb_dim=Dimension of the dense embedding, input_length=Length of input sequences, when it is constant model.add( Embedding(n_most_common_words, emb_dim, input_length=X_train_res.shape[1])) model.add(SpatialDropout1D(dropout)) model.add(LSTM(LSTM_units, dropout=dropout, recurrent_dropout=dropout)) model.add(Dense(2, activation='sigmoid')) # model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) print(model.summary()) import time start_time = time.time() history = model.fit( X_train_res, ytrainres_cat, epochs=epochs, batch_size=batch_size, validation_split=0.0,
def RnnVersion3(n_recurrent=50, n_dense=50, word_embedding_matrix= None, n_filters=50,dropout_rate=0.2, l2_penalty=0.0001, n_capsule = 10, n_routings = 5, capsule_dim = 16,): K.clear_session() def conv_block(x, n, kernel_size): x = Conv1D(n, kernel_size, activation='relu') (x) x = Conv1D(n_filters, kernel_size, activation='relu') (x) x_att = AttentionWithContext()(x) x_avg = GlobalAvgPool1D()(x) x_max = GlobalMaxPool1D()(x) return concatenate([x_att, x_avg, x_max]) def att_max_avg_pooling(x): x_att = AttentionWithContext()(x) x_avg = GlobalAvgPool1D()(x) x_max = GlobalMaxPool1D()(x) return concatenate([x_att, x_avg, x_max]) input1_= Input(shape=(170, ), name='input1') input2_ = Input(shape=(433, ), name='input2') emb = Embedding(21099, 300,trainable=True)(input1_) # model 0 x0 = SpatialDropout1D(dropout_rate)(emb) s0 = Bidirectional( CuDNNGRU(2*n_recurrent, return_sequences=True, kernel_regularizer=l2(l2_penalty), recurrent_regularizer=l2(l2_penalty)))(x0) x0 = att_max_avg_pooling(s0) # model 1 x1 = SpatialDropout1D(dropout_rate)(emb) s1 = Bidirectional( CuDNNGRU(2*n_recurrent, return_sequences=True, kernel_regularizer=l2(l2_penalty), recurrent_regularizer=l2(l2_penalty)))(x1) x1 = att_max_avg_pooling(s1) # combine sequence output x = concatenate([s0, s1]) # x = att_max_avg_pooling(x) x = Bidirectional( CuDNNGRU(n_recurrent, return_sequences=True, kernel_regularizer=l2(l2_penalty), recurrent_regularizer=l2(l2_penalty)))(x) x = att_max_avg_pooling(x) # combine it all x = concatenate([x,x0, x1,input2_],name = 'concatenate') x = Dense(1024, activation='relu')(x) x = Dropout(dropout_rate)(x) x = Dense(256, activation='relu')(x) x = Dropout(dropout_rate)(x) x = Dense(128, activation='relu')(x) x = Dropout(dropout_rate)(x) # fc = Dense(120, activation='relu')(x) outputs = Dense(6, activation='softmax')(x) model = Model(inputs=[input1_,input2_], outputs=outputs) model.compile(loss='categorical_crossentropy', optimizer='nadam',metrics =['accuracy']) return model
epochs = 25 # 20 weights = True trainable = True previous_weights = None activation = 'sigmoid' # ======= ======= print('Build model...') model = Sequential() if weights: model.add(Embedding(max_features, emb_dim, weights=[p.embedding_matrix], trainable=trainable)) else: model.add(Embedding(max_features, emb_dim)) model.add(SpatialDropout1D(spatial_dropout)) model.add(Bidirectional(QRNN(emb_dim//2, window_size=window_size, dropout=dropout, kernel_regularizer=l2(kernel_regularizer), bias_regularizer=l2(bias_regularizer), kernel_constraint=maxnorm(kernel_constraint), bias_constraint=maxnorm(bias_constraint)))) model.add(Dropout(dropout)) model.add(Dense(1, activation=activation)) plot_losses = PlotLosses() plot_accuracy = PlotAccuracy() reduce_rate = ReduceLROnPlateau(monitor='val_loss') callbacks_list = [plot_losses, reduce_rate, plot_accuracy] if clipnorm: optimizer = optimizers.Adam(lr=lr, clipnorm=clipnorm) else:
def deepmoji_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_l2=1E-6, return_attention=False): """ Returns the DeepMoji architecture uninitialized and without using the pretrained model weights. # Arguments: nb_classes: Number of classes in the dataset. nb_tokens: Number of tokens in the dataset (i.e. vocabulary size). maxlen: Maximum length of a token. feature_output: If True the model returns the penultimate feature vector rather than Softmax probabilities (defaults to False). embed_dropout_rate: Dropout rate for the embedding layer. final_dropout_rate: Dropout rate for the final Softmax layer. embed_l2: L2 regularization for the embedding layerl. # Returns: Model with the given parameters. """ # define embedding layer that turns word tokens into vectors # an activation function is used to bound the values of the embedding model_input = Input(shape=(maxlen,), dtype='int32') embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None embed = Embedding(input_dim=nb_tokens, output_dim=256, mask_zero=True, input_length=maxlen, embeddings_regularizer=embed_reg, name='embedding') x = embed(model_input) x = Activation('tanh')(x) # entire embedding channels are dropped out instead of the # normal Keras embedding dropout, which drops all channels for entire words # many of the datasets contain so few words that losing one or more words can alter the emotions completely if embed_dropout_rate != 0: embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop') x = embed_drop(x) # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features # ordering of the way the merge is done is important for consistency with the pretrained model lstm_0_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_0")(x) lstm_1_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_1")(lstm_0_output) x = concatenate([lstm_1_output, lstm_0_output, x]) # if return_attention is True in AttentionWeightedAverage, an additional tensor # representing the weight at each timestep is returned weights = None x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x) if return_attention: x, weights = x if not feature_output: # output class probabilities if final_dropout_rate != 0: x = Dropout(final_dropout_rate)(x) if nb_classes > 2: outputs = [Dense(nb_classes, activation='softmax', name='softmax')(x)] else: outputs = [Dense(1, activation='sigmoid', name='softmax')(x)] else: # output penultimate feature vector outputs = [x] if return_attention: # add the attention weights to the outputs if required outputs.append(weights) return Model(inputs=[model_input], outputs=outputs, name="DeepMoji")
def main(): # ################################## Arguments parser ################################## parser = argparse.ArgumentParser() parser.add_argument('--path', default='data/uci-news-aggregator.csv', help='Path to dataset') parser.add_argument('--layer', default='lstm', help='lSTM or GRU training layer') parser.add_argument('--epochs', type=int, default=8, help='lSTM or GRU training layer') args = parser.parse_args() print('model_' + args.layer + '_Ep' + str(args.epochs) + '.h5') print(args, args.layer, len(sys.argv)) # ################################## 1. Data Loading ################################## # Load data from pickle file with open("data/pickle_Xtrain.pkl", 'rb') as file: pkl_X_train = pickle.load(file) with open("data/pickle_ytrain.pkl", 'rb') as file: pkl_y_train = pickle.load(file) with open("data/pickle_Xtest.pkl", 'rb') as file: pkl_X_test = pickle.load(file) with open("data/pickle_ytest.pkl", 'rb') as file: pkl_y_test = pickle.load(file) X_test = pkl_X_test y_test = pkl_y_test X_train = pkl_X_train y_train = pkl_y_train # Load tokenizer from pickle file with open('data/tokenizer.pickle', 'rb') as handle: tokenizer = pickle.load(handle) print("Data:", (X_train.shape, y_train.shape, X_test.shape, y_test.shape)) # ################################## 2. Training ################################## emb_dim = 128 batch_size = 256 n_most_common_words = 8000 epochs = args.epochs # TensorBoard today = datetime.date.today() log_dir = "logs/fit/" + 'model_' + args.layer + '_Ep' + str( args.epochs) + '(' + str(today) + ')' tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) print("Tensorboard:", log_dir) print() # Model defining model = Sequential() model.add( Embedding(n_most_common_words, emb_dim, input_length=X_train.shape[1])) model.add(SpatialDropout1D(0.7)) if (not args.layer.lower() == 'lstm'): print('GRU MODEL') model.add(GRU(64, dropout=0.7, recurrent_dropout=0.7)) else: print('LSTM MODEL') model.add(LSTM(64, dropout=0.7, recurrent_dropout=0.7)) model.add(Dense(4, activation='softmax')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) print(model.summary()) print() plot_model(model, to_file='saved/model_plot_' + args.layer + '_Ep' + str(args.epochs) + '.png', show_shapes=True, show_layer_names=True) history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, \ validation_split=0.2,callbacks=[EarlyStopping(monitor='val_loss',patience=7, min_delta=0.0001), tensorboard_callback]) # ################################## 3. Save Model ################################## model.save('saved/model_' + args.layer + '_Ep' + str(args.epochs) + '.h5') # creates a HDF5 file 'my_model.h5'
ModelCheckpoint( filepath= './checkpoints/weights.epoch_{epoch:02d}-val_acc_{val_acc:.2f}.h5', monitor='loss', verbose=0, save_best_only=True), ] CNNBranch = Sequential() CNNBranch.add( Embedding(len(weights), output_dim=config.dims, weights=[weights], input_length=config.sequence_length)) CNNBranch.add(BatchNormalization()) CNNBranch.add(SpatialDropout1D(rate=config.dropout)) CNNBranch.add( Conv1D(filters=config.nb_filter, kernel_size=config.filter_length, padding='valid', activation='relu', strides=1)) CNNBranch.add(GlobalMaxPooling1D()) CNNBranch.add(BatchNormalization()) CNNBranch.add(Dropout(config.dropout)) CNNBranch.add(Dense(4, activation='softmax')) CNNBranch.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) CNNBranch.summary() CNNBranch.fit(X_train,
# crop_end = seq_len seq_batch = sequences[i:i + batch_size, crop_start:crop_end, :] CAGE_batch = CAGEs[i:i + batch_size, crop_start:crop_end] CAGE_batch = np.reshape(CAGE_batch, (-1, seq_len, 1)) yield (seq_batch, CAGE_batch) model = Sequential() for i in range(5): # so now we change from a (say)300 bp long vectors of 4 colors # to ~300bp long vectors of CONVSIZE colors, with each of those colors # based on 12 positions in the layer below model.add(Convolution1D(CONVSIZE, CONVWIDTH, border_mode='same', input_shape=(CROP_SIZE, NUCNUM))) model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(SpatialDropout1D(0.2)) model.add(Convolution1D(512, 5, border_mode='same')) model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(SpatialDropout1D(0.1)) model.add(Convolution1D(1, 1, border_mode='same')) model.compile(loss='mse', optimizer='nadam') print('Done compiling!') model.fit_generator(training_batch_generator(sequences, CAGEs, BATCH_SIZE, CROP_SIZE), samples_per_epoch=CAGEs.shape[0], nb_epoch=20, verbose=1) print('done Done Training!')
num_features) # Routings = 30 # Num_capsule = 60 # Dim_capsule = 120 Routings = 15 Num_capsule = 30 Dim_capsule = 60 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True, kernel_size=(3, 1))(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.4)(capsule) output = Dense(3, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) rmsprop = optimizers.rmsprop(lr=0.01) model.compile(loss='categorical_crossentropy',
for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector # Build Model inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=True)(inp) x = SpatialDropout1D(0.35)(x) x = Bidirectional( LSTM(128, return_sequences=True, dropout=0.15, recurrent_dropout=0.15))(x) x = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) x = concatenate([avg_pool, max_pool]) out = Dense(6, activation='sigmoid')(x) model = Model(inp, out)
embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector #---------------------------------- # LSTM model #---------------------------------- model = Sequential() model.add( Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, weights=[embedding_matrix], input_length=X_train.shape[1], trainable=True)) #, input_length=4 model.add(SpatialDropout1D(0.5)) model.add(LSTM(30, return_sequences=True, recurrent_dropout=0.5)) model.add(LSTM(30, dropout=0.5, recurrent_dropout=0.5)) model.add(Dense(30, activation='sigmoid')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #, Precision(), Recall() print(model.summary()) #---------------------------------- # Fit model #---------------------------------- batch_size = 128
print(no_to_text) # Design NN architecture # After this you can have keras, tensorflow, or pytorch framework for yor neural network # KERAS model = Sequential() model.add(Embedding( n_unique_words, n_dim, input_length=max_review_lenth)) # it cnvert word into vector space # The first argument (n_unique_words) n embedded layer is the number of distinct words in the training set. # here n_unique word is required so as to find the lenth of one hot encoding creating for each word so as use in neural network # The second argument (n_dim) indicates the size of the embedding vectors # The input_length argumet, of course, determines the size of each input sequence. # model.output_shape == (None, max_review_lenth, n_dim), where None is the batch dimension model.add(SpatialDropout1D(drop_emd)) model.add(Bidirectional(LSTM(n_lstm, dropout=lstm_drpout))) model.add(Dense(1, activation='sigmoid')) print(model.summary()) # compiling model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) modelcheckpoint = ModelCheckpoint(filepath=output_dir + "/weights.{epoch:02d}.hdf5") if not os.path.exists(output_dir): os.makedirs(output_dir)
output_dim=embedding_size, embeddings_initializer=keras.initializers.Zeros(),#'uniform', embeddings_regularizer=regularizer, mask_zero=True, input_length=maxlen, name='embedding_1')) '''model.add(Embedding(vocab_size, embedding_size, input_length=maxlen, W_regularizer=regularizer, dropout=p_emb, weights=[embedding], mask_zero=True, name='embedding_1'))''' model.add(SpatialDropout1D(p_emb, name='dropout_emb_1')) for i in range(rnn_layers): #New #keras.layers.recurrent.LSTM(units, activation='tanh', recurrent_activation='hard_sigmoid', # use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', # bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, # bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, # bias_constraint=None, dropout=0.0, recurrent_dropout=0.0) lstm = LSTM(units=rnn_size, return_sequences=True, kernel_regularizer=regularizer, #kernel_regularizer recurrent_regularizer=regularizer, #recurrent_regularizer bias_regularizer=regularizer, #bias_regularizer dropout=p_W, #dropout
tokenizer = Tokenizer(num_words=max_fatures, split=' ') tokenizer.fit_on_texts(data['text'].values) X = tokenizer.texts_to_sequences(data['text'].values) X = pad_sequences(X) X[:2] # Next, I compose the LSTM Network. Note that **embed_dim**, **lstm_out**, **batch_size**, **droupout_x** variables are hyperparameters, their values are somehow intuitive, can be and must be played with in order to achieve good results. Please also note that I am using softmax as activation function. The reason is that our Network is using categorical crossentropy, and softmax is just the right activation method for that. # In[7]: embed_dim = 128 lstm_out = 196 model = Sequential() model.add(Embedding(max_fatures, embed_dim, input_length=X.shape[1])) model.add(SpatialDropout1D(0.4)) model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # Hereby I declare the train and test dataset. # In[8]: Y = pd.get_dummies(data['sentiment']).values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20,