def get_test_model_gru_stateful_optional(stateful): """Returns a test model for Gated Recurrent Unit (GRU) layers.""" input_shapes = [ (17, 4), (1, 10) ] stateful_batch_size = 1 inputs = [Input(batch_shape=(stateful_batch_size,) + s) for s in input_shapes] outputs = [] for inp in inputs: gru_sequences = GRU( stateful=stateful, units=8, recurrent_activation='relu', reset_after=True, return_sequences=True, use_bias=True )(inp) gru_regular = GRU( stateful=stateful, units=3, recurrent_activation='sigmoid', reset_after=True, return_sequences=False, use_bias=False )(gru_sequences) outputs.append(gru_regular) gru_bidi_sequences = Bidirectional( GRU( stateful=stateful, units=4, recurrent_activation='hard_sigmoid', reset_after=False, return_sequences=True, use_bias=True ) )(inp) gru_bidi = Bidirectional( GRU( stateful=stateful, units=6, recurrent_activation='sigmoid', reset_after=True, return_sequences=False, use_bias=False ) )(gru_bidi_sequences) outputs.append(gru_bidi) gru_gpu_regular = GRU( stateful=stateful, units=3, activation='tanh', recurrent_activation='sigmoid', reset_after=True, use_bias=True )(inp) gru_gpu_bidi = Bidirectional( GRU( stateful=stateful, units=3, activation='tanh', recurrent_activation='sigmoid', reset_after=True, use_bias=True ) )(inp) outputs.append(gru_gpu_regular) outputs.append(gru_gpu_bidi) model = Model(inputs=inputs, outputs=outputs, name='test_model_gru') model.compile(loss='mse', optimizer='nadam') # fit to dummy data training_data_size = 2 data_in = generate_input_data(training_data_size, input_shapes) initial_data_out = model.predict(data_in) data_out = generate_output_data(training_data_size, initial_data_out) model.fit(data_in, data_out, batch_size=stateful_batch_size, epochs=10) return model
# Model Building params for multiple models LAYERS = [2, 3] SIZE = [64, 128] LEARNER = [tf.keras.optimizers.RMSprop()] # Build each model and run for 5 epochs to get good idea of possibilities for layer in LAYERS: for s in SIZE: for l in LEARNER: print(f"Building new model: layers:{layer} size:{s} learner:{l}") # Build Model model = Sequential() model.add( Bidirectional( GRU(s, input_shape=(trainX.shape[1:]), return_sequences=True))) model.add(Dropout(0.2)) model.add(BatchNormalization()) for eachLayer in range(layer - 2): model.add(Bidirectional(GRU(s, return_sequences=True))) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Bidirectional(GRU(s, return_sequences=False))) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Dense(32, activation='relu')) model.add(Dropout(0.2))
def train(data,batch_size,epoch,maxlen,output_dir,selected_layer): # 한국어, 영어, 숫자만 텍스트에 남기고 형태소 분석 수행함 cleaned = preprocess(data['document'].values) if selected_layer=='bert': # multi_cased 모델의 vocab 파일을 불러와 tokenizer 생성 FullTokenizer = bert.bert_tokenization.FullTokenizer tokenizer = FullTokenizer(vocab_file=vocab_file, do_lower_case=False) # 전처리된 텍스트를 BERT의 input 형태에 맞게 변환 train_tokens = [["[CLS]"] + tokenizer.tokenize(sentence) + ["[SEP]"] for sentence in cleaned] train_tokens_ids = [tokenizer.convert_tokens_to_ids(token) for token in train_tokens] train_data = pad_sequences(train_tokens_ids, maxlen=maxlen, dtype="long", truncating="post", padding="post") # bert_layer에 들어갈 input 형태 지정 input_1 = Input(shape=(maxlen,), dtype=tf.int32, name="input_word_ids") # pre-trained 된 BERT 모델을 keras 레이어의 형태로 불러옴 bert_params = bert.params_from_pretrained_ckpt('./multi_cased_L-12_H-768_A-12') bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert") # bert_layer에 input 레이어를 넣은 후, 신경망 레이어를 flatten하여 가중치를 보존하면서 2차원의 형태로 변환시킴 bert_l = bert_layer(input_1) flatten = Flatten()(bert_l) layer=flatten else: # cnn, bilstm을 선택한 경우, tokenizer가 가질 최대 단어 개수 지정(voca_size) 후 tokenizer 생성 voca_size = 1000000 tokenizer = Tokenizer(num_words=voca_size) tokenizer.fit_on_texts(cleaned) # eval, predict 시 빠르게 불러오기 위해 tokenizer를 json 파일으로 저장 tokenizer_json=tokenizer.to_json() with open('tokenizer.json', 'w', encoding='utf-8') as f: f.write(json.dumps(tokenizer_json, ensure_ascii=False)) # 전처리된 텍스트를 정수로 변환한 후, 길이를 maxlen에 맞춤 train_data = tokenizer.texts_to_sequences(cleaned) train_data = pad_sequences(train_data, padding='post', maxlen=maxlen) ## cnn, bilstm layer 에 들어갈 input 형태 지정 input_1 = Input(shape=(maxlen,)) # pre-trained glove 임베딩 모델 로딩 vocab_size = len(tokenizer.word_index) + 1 embedding_dim = 100 embedding_matrix = pretrained_embedding_load( 'glove.txt', vocab_size=vocab_size, num_demension=embedding_dim, tokenizer=tokenizer) embedding_layer = Embedding(output_dim=embedding_dim, input_dim=vocab_size, weights=[embedding_matrix], input_length=maxlen, trainable=False)(input_1) if selected_layer == 'bilstm': # 위에서 생성한 embedding_layer를 가져와서 bi-lstm 레이어에 넣음 bilstm1 = Bidirectional(LSTM(256, dropout=0.3, recurrent_dropout=0.3))(embedding_layer) layer = bilstm1 elif selected_layer == 'cnn': filter_sizes = 3 # convolutional filter 사이즈 지정, 3개의 단어를 보는것으로 지정함 num_filters = 512 # filter의 수 # conv2d 함수가 요구하는 차원수로 만들어주기 위해 차원을 하나 추가함(reshape) reshape = Reshape((maxlen, embedding_dim, 1))(embedding_layer) # 합성곱층과 풀링층을 거치면서 cnn레이어 구축 conv1 = Conv2D(num_filters, kernel_size=(filter_sizes, embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape) maxpool1 = MaxPool2D(pool_size=(maxlen - filter_sizes + 1, 1), strides=(1, 1), padding='valid')(conv1) # 이진 분류를 위해 2차원으로 레이어를 flatten함 flatten = Flatten()(maxpool1) layer = flatten # 데이터의 라벨을 numpy 배열의 형태로 변환 label=np.array(data['label']) # 은닉층의 출력 뉴런수를 줄이기 위해 relu 활성 함수 사용함 dense_layer = Dense(16, activation='relu')(layer) # 과적합 방지를 위해 drop out 수행 drop = Dropout(rate=0.1)(dense_layer) # 이진 분류이므로 출력 뉴런의 수를 1로 설정하고, sigmoid 활성 함수 사용함 output = Dense(1, activation='sigmoid')(drop) model = Model(inputs=input_1, outputs=output) #이진 분류이므로 loss function로는 binary_crossentropy, optimizer로는 adam 사용함 model.compile(loss='binary_crossentropy', optimizer=tf.optimizers.Adam(lr=0.00001), metrics=['accuracy']) print(model.summary()) if selected_layer=='bert': # 모델 가중치 저장위해 callback 생성 checkpointName = os.path.join(output_dir, "bert_model.ckpt") cp_callback = ModelCheckpoint(filepath=checkpointName, save_weights_only=True, verbose=1) model.fit(x=train_data, y=label, batch_size=batch_size, epochs=epoch, verbose=1, validation_split=0.2,callbacks=[cp_callback]) else: model.fit(x=train_data, y=label, batch_size=batch_size, epochs=epoch, verbose=1, validation_split=0.2) model.save(output_dir)
limit = n_timesteps / 4.0 y = np.array([0 if x < limit else 1 for x in np.cumsum(X)]) X = X.reshape(1, n_timesteps, 1) y = y.reshape(1, n_timesteps, 1) return X, y n_units = 20 n_timesteps = 4 model = Sequential() model.add( Bidirectional( LSTM(n_units, return_sequences=True, input_shape=(n_timesteps, 1)))) model.add(TimeDistributed(Dense(1, activation='sigmoid'))) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) for spoch in range(1000): X, y = get_sequence(n_timesteps) model.fit(X, y, epochs=1, batch_size=1, verbose=2) X, y = get_sequence(n_timesteps) yhat = model.predict_classes(X, verbose=0) for i in range(n_timesteps): print('실젯값 : ', y[0, i], '예측값 : ', yhat[0, i])
n_gram_sequence = token_list[:i+1] input_sequences.append(n_gram_sequence) # pad sequences max_sequence_len = max([len(x) for x in input_sequences]) input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')) # create predictors and label predictors, label = input_sequences[:,:-1],input_sequences[:,-1] label = ku.to_categorical(label, num_classes=total_words) model = Sequential() model.add(Embedding(total_words, 100, input_length=max_sequence_len-1)) #(# Your Embedding Layer) model.add(Bidirectional(LSTM(150, return_sequences=True))) #(# An LSTM Layer) model.add(Dropout(0.2)) #(# A dropout layer) model.add(LSTM(100)) #(# Another LSTM Layer) model.add(Dense(total_words/2, activation='relu')) #(# A Dense Layer including regularizers) model.add(Dense(total_words, activation='softmax')) #(# A Dense Layer) # Pick an optimizer model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) #(# Pick a loss function and an optimizer) print(model.summary()) history = model.fit(predictors, label, epochs=100, verbose=1) import matplotlib.pyplot as plt acc = history.history['acc'] loss = history.history['loss'] epochs = range(len(acc))
callbackslist = [ TensorBoard(log_dir='logs'), ModelCheckpoint(filepath='Model.h5', monitor='val_loss', save_best_only=True) ] input1 = Input(shape=(2048, )) imodel1 = Dropout(0.5)(input1) imodel2 = Dense(512, activation='relu')(imodel1) input2 = Input(shape=(34, )) tmodel1 = Embedding(vocabulary_size, 50, mask_zero=True, trainable=False)(input2) tmodel2 = Dropout(0.4)(tmodel1) tmodel3 = Bidirectional(LSTM(256, return_sequences=True))(tmodel2) tmodel4 = Dropout(0.4)(tmodel3) tmodel5 = Bidirectional(LSTM(256, return_sequences=False))(tmodel4) decoder1 = Add()([imodel2, tmodel5]) decoder2 = Dense(256, activation='relu')(decoder1) outputs = Dense(vocabulary_size, activation='softmax')(decoder2) model = Model(inputs=[input1, input2], outputs=outputs) model.summary() model.layers[1].set_weights([emb_matrix]) model.compile(loss='binary_crossentropy', optimizer='adam') model.fit([TX2, TX1], TY, epochs=10,
def build_model_hpconfig(args): """ Description: Building models for hyperparameter Tuning Args: args: input arguments Returns: model (keras model) """ #parsing and assigning hyperparameter variables from argparse conv1_filters = int(args.conv1_filters) conv2_filters = int(args.conv2_filters) conv3_filters = int(args.conv3_filters) window_size = int(args.window_size) kernel_regularizer = args.kernel_regularizer max_pool_size = int(args.pool_size) conv_dropout = float(args.conv_dropout) conv1d_initializer = args.conv_weight_initializer recurrent_layer1 = int(args.recurrent_layer1) recurrent_layer2 = int(args.recurrent_layer2) recurrent_dropout = float(args.recurrent_dropout) after_recurrent_dropout = float(args.after_recurrent_dropout) recurrent_recurrent_dropout = float(args.recurrent_recurrent_dropout) recurrent_initalizer = args.recurrent_weight_initializer optimizer = args.optimizer learning_rate = float(args.learning_rate) bidirection = args.bidirection recurrent_layer = str(args.recurrent_layer) dense_dropout = float(args.dense_dropout) dense_1 = int(args.dense_1) dense_initializer = args.dense_weight_initializer train_data = str(args.train_input_data) #main input is the length of the amino acid in the protein sequence (700,) main_input = Input(shape=(700, ), dtype='float32', name='main_input') #Embedding Layer used as input to the neural network embed = Embedding(output_dim=21, input_dim=21, input_length=700)(main_input) #secondary input is the protein profile features auxiliary_input = Input(shape=(700, 21), name='aux_input') #get shape of input layers print("Protein Sequence shape: ", main_input.get_shape()) print("Protein Profile shape: ", auxiliary_input.get_shape()) #concatenate input layers concat = Concatenate(axis=-1)([embed, auxiliary_input]) #3x1D Convolutional Hidden Layers with BatchNormalization, Dropout and MaxPooling conv_layer1 = Conv1D(conv1_filters, window_size, kernel_regularizer=kernel_regularizer, padding='same', kernel_initializer=conv1d_initializer)(concat) batch_norm = BatchNormalization()(conv_layer1) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv_act) max_pool_1D_1 = MaxPooling1D(pool_size=max_pool_size, strides=1, padding='same')(conv_dropout) conv_layer2 = Conv1D(conv2_filters, window_size, padding='same', kernel_initializer=conv1d_initializer)(concat) batch_norm = BatchNormalization()(conv_layer2) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv_act) max_pool_1D_2 = MaxPooling1D(pool_size=max_pool_size, strides=1, padding='same')(conv_dropout) conv_layer3 = Conv1D(conv3_filters, window_size, kernel_regularizer=kernel_regularizer, padding='same', kernel_initializer=conv1d_initializer)(concat) batch_norm = BatchNormalization()(conv_layer3) conv_act = activations.relu(batch_norm) conv_dropout = Dropout(conv_dropout)(conv_act) max_pool_1D_3 = MaxPooling1D(pool_size=max_pool_size, strides=1, padding='same')(conv_dropout) #concat pooling layers conv_features = Concatenate(axis=-1)( [max_pool_1D_1, max_pool_1D_2, max_pool_1D_3]) print("Shape of convolutional output: ", conv_features.get_shape()) conv_features = Dense(600, activation='relu')(conv_features) ######## Recurrent Layers ######## if (recurrent_layer == 'lstm'): if (bidirection): print('Entering LSTM Layers') #Creating Bidirectional LSTM layers lstm_f1 = Bidirectional( LSTM(recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer))(conv_features) lstm_f2 = Bidirectional( LSTM(recurrent_layer2, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer))(lstm_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)( [lstm_f1, lstm_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) print('Concatenated LSTM layers') else: #Creating unidirectional LSTM Layers lstm_f1 = LSTM( recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer)(conv_features) lstm_f2 = LSTM(recurrent_layer2, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer)(lstm_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)( [lstm_f1, lstm_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) elif (recurrent_layer == 'gru'): if (bidirection): #Creating Bidirectional GRU layers gru_f1 = Bidirectional( GRU(recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer))(conv_features) gru_f2 = Bidirectional( GRU(recurrent_layer2, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer))(gru_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)( [gru_f1, gru_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) else: #Creating unidirectional GRU Layers gru_f1 = GRU( recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer)(conv_features) gru_f2 = GRU(recurrent_layer1, return_sequences=True, activation='tanh', recurrent_activation='sigmoid', dropout=recurrent_dropout, recurrent_dropout=recurrent_recurrent_dropout, kernel_initializer=recurrent_initalizer)(gru_f1) #concatenate LSTM with convolutional layers concat_features = Concatenate(axis=-1)( [gru_f1, gru_f2, conv_features]) concat_features = Dropout(after_recurrent_dropout)(concat_features) else: print('Only LSTM and GRU recurrent layers are used in this model') return #Dense Fully-Connected DNN layers fc_dense1 = Dense(dense_1, activation='relu', kernel_initializer=dense_initializer)(concat_features) fc_dense1_dropout = Dropout(dense_dropout)(fc_dense1) #Final Output layer with 8 nodes for the 8 output classifications main_output = Dense(8, activation='softmax', name='main_output')(fc_dense1_dropout) #create model from inputs and outputs model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output]) #Set optimizer to be used with the model, default is Adam if optimizer == 'adam': optimizer = Adam(lr=learning_rate, name='adam') elif optimizer == 'sgd': optimizer = SGD(lr=0.01, momentum=0.0, nesterov=False, name='SGD') elif optimizer == 'rmsprop': optimizer = RMSprop(learning_rate=learning_rate, centered=True, name='RMSprop') elif optimizer == 'adagrad': optimizer = Adagrad(learning_rate=learning_rate, name='Adagrad') elif optimizer == 'adamax': optimizer = Adamax(learning_rate=learning_rate, name='Adamax') else: optimizer = 'adam' optimizer = Adam(lr=learning_rate, name='adam') #compile model using optimizer and the cateogorical crossentropy loss function model.compile(optimizer=optimizer, loss={'main_output': 'categorical_crossentropy'}, metrics=[ 'accuracy', MeanSquaredError(), FalseNegatives(), FalsePositives(), TrueNegatives(), TruePositives(), MeanAbsoluteError(), Recall(), Precision() ]) #get summary of model including its layers and num parameters model.summary() return model
def doaV0(self): inputs = tf.keras.Input(self.input_shape) drop_rate = 1. - self.params['dropout_keep_prob_cnn'] x = Conv2D(name='conv1', filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs) x = BatchNormalization(name='bn1', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = Conv2D(name='conv2', filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn2', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool2', pool_size=(5, 2), strides=(5, 2), padding='same')(x) x = Dropout(rate=drop_rate)(x) x = Conv2D(name='conv3', filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn3', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool3', pool_size=(1, 2), strides=(1, 2), padding='valid')(x) x = Dropout(rate=drop_rate)(x) x = Conv2D(name='conv4', filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn4', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool4', pool_size=(1, 2), strides=(1, 2), padding='valid')(x) x = Dropout(rate=drop_rate)(x) x = Conv2D(name='conv5', filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn5', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool5', pool_size=(1, 2), strides=(1, 2), padding='valid')(x) x = Dropout(rate=drop_rate)(x) x = Conv2D(name='conv6', filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn6', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool6', pool_size=(1, 2), strides=(1, 2), padding='valid')(x) x = Dropout(rate=drop_rate)(x) x = tf.reshape(x, [-1, self.out_shape_doa[0], 2 * 256]) x = Bidirectional(GRU(units=self.params['rnn_hidden_size'], return_sequences=True), name='bidirecrtionalGRU')(x) x = SelfAttention(attention_size=self.params['attention_size'])(x) x = tf.reshape(x, [-1, 2 * self.params['rnn_hidden_size']]) drop_rate_dnn = 1. - self.params['dropout_keep_prob_dnn'] # -------------DOA---------------- x = Dense(self.params['dnn_size'], activation='relu', name='dense_relu_doa1')(x) x = Dropout(rate=drop_rate_dnn)(x) x = Dense(self.params['dnn_size'], activation='relu', name='dense_relu_doa2')(x) x = Dropout(rate=drop_rate_dnn)(x) x = Dense(self.out_shape_doa[-1], name='dense_doa3')(x) x = tf.keras.activations.tanh(x) x = tf.reshape(x, shape=[-1, self.out_shape_doa[0], self.out_shape_doa[1]], name='output_doa') model = tf.keras.Model( inputs=inputs, outputs=x, name="Doa_net_v0") return model
def sedV0(self, *args, **kwargs): out_shape_sed = self.out_shape_sed params = self.params inputs = tf.keras.Input(self.input_shape) drop_rate = 1. - params['dropout_keep_prob_cnn'] x = Conv2D(name='conv1', filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs) x = BatchNormalization(name='bn1', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = Conv2D(name='conv2', filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn2', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool2', pool_size=(5, 2), strides=(5, 2), padding='same')(x) x = Dropout(rate=drop_rate)(x) x = Conv2D(name='conv3', filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn3', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool3', pool_size=(1, 2), strides=(1, 2), padding='valid')(x) x = Dropout(rate=drop_rate)(x) x = Conv2D(name='conv4', filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn4', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool4', pool_size=(1, 2), strides=(1, 2), padding='valid')(x) x = Dropout(rate=drop_rate)(x) x = Conv2D(name='conv5', filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn5', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool5', pool_size=(1, 2), strides=(1, 2), padding='valid')(x) x = Dropout(rate=drop_rate)(x) x = Conv2D(name='conv6', filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) x = BatchNormalization(name='bn6', center=True, scale=True, trainable=True)(x) x = tf.keras.activations.relu(x) x = MaxPool2D(name='maxpool6', pool_size=(1, 2), strides=(1, 2), padding='valid')(x) x = Dropout(rate=drop_rate)(x) x = tf.reshape(x, [-1, out_shape_sed[0], 2 * 256]) x = Bidirectional(GRU(units=params['rnn_hidden_size'], return_sequences=True), name='bidirecrtionalGRU')(x) x = SelfAttention(attention_size=params['attention_size'])(x) x = tf.reshape(x, [-1, 2 * params['rnn_hidden_size']]) drop_rate_dnn = 1. - params['dropout_keep_prob_dnn'] # -------------SED---------------- x_sed = Dense(params['dnn_size'], activation='relu', name='dense_relu_sed1')(x) x_sed = Dropout(rate=drop_rate_dnn)(x_sed) x_sed = Dense(params['dnn_size'], activation='relu', name='dense_relu_sed2')(x_sed) x_sed = Dropout(rate=drop_rate_dnn)(x_sed) x_sed = Dense(out_shape_sed[-1], name='dense_sed3')(x_sed) x_sed = tf.keras.activations.sigmoid(x_sed) x_sed = tf.reshape(x_sed, shape=[-1, out_shape_sed[0], out_shape_sed[1]], name='output_sed') model = tf.keras.Model( inputs=inputs, outputs=x_sed, name="Sed_net_v0") return model
def train(self, dataset='all'): self.device_calibration() X_train, X_test, y_train, y_test, X, y = self._get_data( dataset, 'tensor') trn, val, preproc = ktrain.text.texts_from_array(X_train, y_train, X_test, y_test, maxlen=26) model = tf.keras.Sequential([ Embedding(30000, 15), Dropout(0.2), Bidirectional(LSTM(15)), Dense(1, activation='sigmoid') ]) print(model.summary()) model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(1e-4), metrics=[ f1, Recall(name='recall'), Precision(name='precision'), 'accuracy' ]) print(model.summary()) class validate(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, logs=None): learner.validate(print_report=False, save_path='logs/rnn/e' + str(epoch + 1) + '.csv', class_names=preproc.get_classes()) learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=100) learner.model.compile(metrics=['accuracy'], loss=tf.keras.losses.BinaryCrossentropy(), optimizer='adam') learner.set_weight_decay(0.01) learner.fit_onecycle(1e-4, 20, callbacks=[ tf.keras.callbacks.EarlyStopping( patience=5, monitor='val_loss', mode='min', restore_best_weights=True), validate() ]) self.y_train = y_train self.y_test = y_test self.X_test = X_test return model
def fit( self, X_train, y_train, maxlen = 100, learning_rate = 1e-3, batch_size = 8, dropout = 0.3, units = 124, wdir = 'checkpoints/', ): ''' Train a new model ''' # create directory to save model files to cwd = os.getcwd() wdir = os.path.join( cwd, wdir ) if not os.path.exists( wdir ): os.makedirs( wdir ) print('Preprocessing text') X_train = [ self.preprocess(t) for t in X_train ] new_maxlen = max( [len(i.split()) for i in X_train] ) maxlen = max(maxlen, new_maxlen) self.max_len = maxlen # KERAS TOKENIZER print('Tokenizing text') self.tokenizer = Tokenizer( num_words=6500, lower=True, oov_token='oov', filters='"#$%&()*+,-./:;<=>@[\\]^_`{|}~\t\n', # removed '!' and '?' ) self.tokenizer.fit_on_texts(X_train) X_train = self.tokenizer.texts_to_sequences(X_train) self.vocab_size = len(self.tokenizer.word_index) + 1 print('Vocabulary size:', self.vocab_size) X_train = pad_sequences(X_train, padding='post', maxlen=maxlen) switch = 2 embedding_matrix, EMBED_SIZE = self.get_embeddings(switch) optimizer = Adam emb = self.embeddings_switch[ switch ] time_stamp = time.strftime("%Y%m%dT%H%M") optimizer_name = optimizer.__module__.split('.')[-1].capitalize() params = f'\nEmbeddings={emb}, LR={learning_rate}, batch_size={batch_size}, dropout={dropout}, units={units}, optimizer={optimizer_name}' print( 'Classifier parameters:', params ) print( 'Timestamp:', time_stamp) deep_inputs = Input(shape=(maxlen,)) embedding_layer = Embedding(self.vocab_size, EMBED_SIZE, weights=[embedding_matrix], trainable=False)(deep_inputs) LSTM_1 = Bidirectional(LSTM( units, dropout=dropout, return_sequences=True ))(embedding_layer) gmp1d = GlobalMaxPool1D()(LSTM_1) dense_layer = Dense(1, activation='sigmoid')(gmp1d) self.model = Model(inputs=deep_inputs, outputs=dense_layer) self.model.compile( loss='binary_crossentropy', optimizer=optimizer(lr=learning_rate), metrics=['accuracy'] ) early_stop = tf.keras.callbacks.EarlyStopping( monitor='val_accuracy', patience=5, restore_best_weights=True, verbose=2, ) reduce_lr = tf.keras.callbacks.ReduceLROnPlateau( monitor="val_loss", patience=2, factor=0.2, min_lr=5e-5, verbose=2, ) filepath = wdir + time_stamp + '-epoch{epoch:02d}-val_accu_{val_accuracy:.2f}-val_loss_{val_loss:.2f}.hdf5' checkpoint = tf.keras.callbacks.ModelCheckpoint( filepath, verbose=0, ) history = self.model.fit( X_train, y_train, batch_size=batch_size, epochs=21, verbose=2, validation_split=0.2, callbacks=[ early_stop, reduce_lr, checkpoint ] ) plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train','test'], loc='upper left') plt.show() plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train','test'], loc='upper left') plt.show() with open(f'{wdir}/{time_stamp}_tokenizer.pkl', 'wb') as f: pickle.dump( self.tokenizer, f, protocol=pickle.HIGHEST_PROTOCOL )
def execute(weeks): np.random.seed(RANDOM_SEED) # Parsing data contributions = [] for week in weeks: for day in week['contributionDays']: contributions.append(day['contributionCount']) days = np.arange(0, len(contributions), 1) df = pd.DataFrame(dict(contributions=contributions), index=days, columns=['contributions']) # df = df.sort_values('Date') # logger.info(df.shape) # Normalization scaler = MinMaxScaler() contributions = df.contributions.values.reshape(-1, 1) scaled_contributions = scaler.fit_transform(contributions) # logger.info(np.isnan(scaled_contributions).any()) # scaled_contributions = scaled_contributions[~np.isnan(scaled_contributions)] # scaled_contributions = scaled_contributions.reshape(-1, 1) # logger.info(np.isnan(scaled_contributions).any()) # Preprocessing X_train, y_train, X_test, y_test = preprocess(scaled_contributions, SEQ_LEN, train_split=0.98) # logger.info(y_test) # logger.info(X_train.shape) # logger.info(X_test.shape) # Model model = keras.Sequential() model.add( Bidirectional(LSTM(WINDOW_SIZE, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'), input_shape=(WINDOW_SIZE, X_train.shape[-1]))) model.add(Dropout(rate=DROPOUT)) model.add( Bidirectional( LSTM((WINDOW_SIZE * 2), return_sequences=True, activation='tanh', recurrent_activation='sigmoid'))) model.add(Dropout(rate=DROPOUT)) model.add(Bidirectional(LSTM(WINDOW_SIZE, return_sequences=False))) model.add(Dense(units=1)) model.add(Activation('linear')) # Training model.compile(loss='mean_squared_error', optimizer='adam') model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=False, validation_split=0.1) model.evaluate(X_test, y_test) # Prediction y_hat = model.predict(X_test) y_hat_inverse = scaler.inverse_transform(y_hat) return y_hat_inverse.tolist()
mnist = datasets.mnist (x_train, t_train), (x_test, t_test) = mnist.load_data() x_train = (x_train.reshape(-1, 28, 28) / 255).astype(np.float32) x_test = (x_test.reshape(-1, 28, 28) / 255).astype(np.float32) x_train, x_val, t_train, t_val = \ train_test_split(x_train, t_train, test_size=0.2) ''' 2. モデルの構築 ''' model = Sequential() model.add( Bidirectional(LSTM(25, activation='tanh', recurrent_activation='sigmoid', kernel_initializer='glorot_normal', recurrent_initializer='orthogonal'), merge_mode='concat')) model.add( Dense(10, kernel_initializer='glorot_normal', activation='softmax')) ''' 3. モデルの学習 ''' optimizer = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
def get_test_model_lstm_stateful(): stateful_batch_size = 1 input_shapes = [ (17, 4), (1, 10), (None, 4), (12,), (12,) ] inputs = [Input(batch_shape=(stateful_batch_size,) + s) for s in input_shapes] outputs = [] for in_num, inp in enumerate(inputs[:2]): stateful = bool((in_num + 1) % 2) lstm_sequences = LSTM( stateful=stateful, units=8, recurrent_activation='relu', return_sequences=True, name='lstm_sequences_' + str(in_num) + '_st-' + str(stateful) )(inp) stateful = bool((in_num) % 2) lstm_regular = LSTM( stateful=stateful, units=3, recurrent_activation='sigmoid', return_sequences=False, name='lstm_regular_' + str(in_num) + '_st-' + str(stateful) )(lstm_sequences) outputs.append(lstm_regular) stateful = bool((in_num + 1) % 2) lstm_state, state_h, state_c = LSTM( stateful=stateful, units=3, recurrent_activation='sigmoid', return_state=True, name='lstm_state_return_' + str(in_num) + '_st-' + str(stateful) )(inp) outputs.append(lstm_state) outputs.append(state_h) outputs.append(state_c) stateful = bool((in_num + 1) % 2) lstm_bidi_sequences = Bidirectional( LSTM( stateful=stateful, units=4, recurrent_activation='hard_sigmoid', return_sequences=True, name='bi-lstm1_' + str(in_num) + '_st-' + str(stateful) ) )(inp) stateful = bool((in_num) % 2) lstm_bidi = Bidirectional( LSTM( stateful=stateful, units=6, recurrent_activation='linear', return_sequences=False, name='bi-lstm2_' + str(in_num) + '_st-' + str(stateful) ) )(lstm_bidi_sequences) outputs.append(lstm_bidi) initial_state_stateful = LSTM(units=12, return_sequences=True, stateful=True, return_state=True, name='initial_state_stateful')(inputs[2], initial_state=[inputs[3], inputs[4]]) outputs.extend(initial_state_stateful) initial_state_not_stateful = LSTM(units=12, return_sequences=False, stateful=False, return_state=True, name='initial_state_not_stateful')(inputs[2], initial_state=[inputs[3], inputs[4]]) outputs.extend(initial_state_not_stateful) model = Model(inputs=inputs, outputs=outputs) model.compile(loss='mean_squared_error', optimizer='nadam') # fit to dummy data training_data_size = stateful_batch_size data_in = generate_input_data(training_data_size, input_shapes) initial_data_out = model.predict(data_in) data_out = generate_output_data(training_data_size, initial_data_out) model.fit(data_in, data_out, batch_size=stateful_batch_size, epochs=10) return model
y = [[tag2idx[w[2]] for w in s] for s in sentences] y = pad_sequences(maxlen=max_len, sequences=y, padding="post", value=tag2idx["O"]) x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) input_word = Input(shape=(max_len, )) model = Embedding(input_dim=num_words, output_dim=50, input_length=max_len)(input_word) model = SpatialDropout1D(0.1)(model) model = Bidirectional( GRU(units=100, return_sequences=True, recurrent_dropout=0.1))(model) model = TimeDistributed(Dense(num_tags))(model) out = Activation("softmax", dtype="float32", name="predictions")(model) model = Model(input_word, out) model.summary() model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) early_stopping = EarlyStopping( monitor="val_accuracy", min_delta=0, patience=1, verbose=0, mode="max",
kernel_initializer='glorot_normal', strides=1)(in_sequence) embedded = Dropout(0.1)(embedded) embedded = MaxPooling1D(pool_size=pool_length)(embedded) for i in range(1, len(nb_filter)): embedded = Conv1D(filters=nb_filter[i], kernel_size=filter_length[i], padding='valid', activation='relu', kernel_initializer='glorot_normal', strides=1)(embedded) embedded = Dropout(0.1)(embedded) embedded = MaxPooling1D(pool_size=pool_length)(embedded) bi_lstm_seq = \ Bidirectional(LSTM(64, return_sequences=False, dropout=0.15, recurrent_dropout=0.15, implementation=0))(embedded) label = Dropout(0.3)(bi_lstm_seq) label = Dense(64, activation='relu')(label) label = Dense(2, activation='sigmoid')(label) # sentence encoder labeler = Model(inputs=in_sequence, outputs=label) labeler.summary() positives = [] pos_labels = [] for line in open( '/Users/emzodls/Dropbox/Lab/Warwick/RiPP_nnets/final_train_sets/positives_all.fa' ): if not line.startswith('>'): positives.append(line.strip().lower())
#128 10 1(0~999) x_train = np.random.randint(word_num, size=(train_size, max_length)) #128 1(0~1) y_train = to_categorical(np.random.randint(class_num, size=(train_size, max_length)),class_num) x_val = np.random.randint(word_num, size=(val_size, max_length)) y_val = to_categorical(np.random.randint(class_num, size=(val_size, max_length)),class_num) # print("x_train.shape={}".format(x_train.shape)) # print("y_train.shape={}".format(y_train.shape)) S_inputs = Input(shape=(max_length,), dtype='int32') # print(K.int_shape(S_inputs)) embeddings = Embedding(word_num, emb_size)(S_inputs) # print(K.int_shape(embeddings)) lstm_seq = Bidirectional(LSTM(128,return_sequences = True))(embeddings) lstm_seq = Position_Embedding()(lstm_seq) # print(K.int_shape(lstm_seq)) O_seq = Attention(8, 16)([lstm_seq, lstm_seq, lstm_seq]) O_seq = Attention(8, 16)([O_seq, O_seq, O_seq]) # print(K.int_shape(O_seq)) # O_seq = GlobalAveragePooling1D()(O_seq) # print(K.int_shape(O_seq)) # O_seq = Dropout(0.5)(O_seq) # outputs = Dense(1, activation='sigmoid')(O_seq) # print(K.int_shape(outputs)) outputs = TimeDistributed(Dense(class_num, activation='sigmoid'))(O_seq) # print(K.int_shape(outputs)) model = Model(inputs=S_inputs, outputs=outputs) print(model.summary()) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Building an RNN to train our text generation model will be very similar to the # sentiment models you've built previously. The only real change necessary is to # make sure to use Categorical instead of Binary Cross Entropy as the loss # function - we could use Binary before since the sentiment was only 0 or 1, but # now there are hundreds of categories. # From there, we should also consider using *more* epochs than before, as text # generation can take a little longer to converge than sentiment analysis, *and* # we aren't working with all that much data yet. I'll set it at 200 epochs here # since we're only use part of the dataset, and training will tail off quite a # bit over that many epochs. model = Sequential() model.add(Embedding(total_words, 64, input_length=max_sequence_len - 1)) model.add(Bidirectional(LSTM(20))) model.add(Dense(total_words, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model, history = savefit(model, input_sequences, one_hot_labels, epochs=200, verbose=0) ### View the Training Graph import matplotlib.pyplot as plt
def build(self, word_length, target_label_dims, word_vocab_size, char_vocab_size, word_embedding_dims=100, char_embedding_dims=16, word_lstm_dims=20, tagger_lstm_dims=200, dropout=0.5, crf_mode='pad'): """ Build a NERCRF model Args: word_length (int): max word length in characters target_label_dims (int): number of entity labels (for classification) word_vocab_size (int): word vocabulary size char_vocab_size (int): character vocabulary size word_embedding_dims (int): word embedding dimensions char_embedding_dims (int): character embedding dimensions word_lstm_dims (int): character LSTM feature extractor output dimensions tagger_lstm_dims (int): word tagger LSTM output dimensions dropout (float): dropout rate crf_mode (string): CRF operation mode, select 'pad'/'reg' for supplied sequences in input or full sequence tagging. ('reg' is forced when use_cudnn=True) """ self.word_length = word_length self.target_label_dims = target_label_dims self.word_vocab_size = word_vocab_size self.char_vocab_size = char_vocab_size self.word_embedding_dims = word_embedding_dims self.char_embedding_dims = char_embedding_dims self.word_lstm_dims = word_lstm_dims self.tagger_lstm_dims = tagger_lstm_dims self.dropout = dropout self.crf_mode = crf_mode assert crf_mode in ('pad', 'reg'), 'crf_mode is invalid' # build word input words_input = Input(shape=(None, ), name='words_input') embedding_layer = Embedding(self.word_vocab_size, self.word_embedding_dims, name='word_embedding') word_embeddings = embedding_layer(words_input) # create word character embeddings word_chars_input = Input(shape=(None, self.word_length), name='word_chars_input') char_embedding_layer = Embedding( self.char_vocab_size, self.char_embedding_dims, name='char_embedding')(word_chars_input) char_embeddings = TimeDistributed( Conv1D(128, 3, padding='same', activation='relu'))(char_embedding_layer) char_embeddings = TimeDistributed( GlobalMaxPooling1D())(char_embeddings) # create the final feature vectors features = concatenate([word_embeddings, char_embeddings], axis=-1) # encode using a bi-LSTM features = Dropout(self.dropout)(features) bilstm = Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True))(features) bilstm = Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True))(bilstm) bilstm = Dropout(self.dropout)(bilstm) bilstm = Dense(self.target_label_dims)(bilstm) inputs = [words_input, word_chars_input] if self.use_cudnn: self.crf_mode = 'reg' with tf.device('/cpu:0'): crf = CRF(self.target_label_dims, mode=self.crf_mode, name='ner_crf') if self.crf_mode == 'pad': sequence_lengths = Input(batch_shape=(None, 1), dtype='int32') predictions = crf([bilstm, sequence_lengths]) inputs.append(sequence_lengths) else: predictions = crf(bilstm) # compile the model model = tf.keras.Model(inputs=inputs, outputs=predictions) model.compile(loss={'ner_crf': crf.loss}, optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.), metrics=[crf.viterbi_accuracy]) self.model = model
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) x_train = sequence.pad_sequences(x_train, maxlen=max_length) x_test = sequence.pad_sequences(x_test, maxlen=max_length) # skipgram model load model_loaded = load_model('skipgram_model.h5') # earlystopping callback earlystopping = EarlyStopping(patience=10, monitor='val_accuracy') # LSTM model input_x_LSTM = Input(batch_shape=(None, max_length)) Embedding_LSTM = model_loaded.layers[2]( input_x_LSTM) # 미리 학습된 model1의 embedding layer를 불러와 그대로 쓰기 biLSTM_LSTM = Bidirectional(LSTM(64))(Embedding_LSTM) Output_LSTM = Dense(1, activation='sigmoid')(biLSTM_LSTM) model_LSTM = Model(input_x_LSTM, Output_LSTM) model_LSTM.layers[1].trainable = False model_LSTM.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) hist_LSTM = model_LSTM.fit(x_train, y_train, validation_data=[x_test, y_test], batch_size=100, epochs=100, callbacks=[earlystopping])
trainX, valX, trainY, valY = train_test_split( sentences, targets_final, test_size=0.2, shuffle=True ) #prepare model for generating embedding inp = Input( shape=( MAX_SEQ_LEN,) ) emb = Embedding( input_dim=VOCAB_SIZE, output_dim=50, weights=[embedding_matrix], trainable=False, input_length=MAX_SEQ_LEN)(inp) embedding = Model( inp, emb ) save_model( embedding, "embedding_model.h5" ) #Model for identifying tags of each word inp = Input( shape=(MAX_SEQ_LEN, 50) ) drop = Dropout(0.1)(inp) #two bidirectinal LSTM layers lstm1 = LSTM( 50, return_sequences=True, recurrent_dropout=0.1) seq1 = Bidirectional(lstm1)( drop ) lstm2 = LSTM( 50, return_sequences=True, recurrent_dropout=0.1) seq2 = Bidirectional(lstm2)( seq1 ) # TIME_DISTRIBUTED -> ( MAX_SEQ_LEN, 50 ) -> (MAX_SEQ_LEN, POS_SIZE) tags = TimeDistributed( Dense(POS_SIZE, activation="relu") )(seq2) model = Model( inp, tags ) model.compile( optimizer="rmsprop", loss="categorical_crossentropy", metrics=[ "accuracy" ] ) #batch generator for model training def getBatch(sentences, targets, batch_size=128): n = len(sentences)//batch_size for i in range( n+1 ): x = sentences[ i*batch_size : (i+1)*batch_size ] x = embedding.predict(x) y = targets[ i*batch_size : (i+1)*batch_size ] yield x,y
def convolution_model(num_speakers=2): # == Audio convolution layers == model = Sequential() # # Implicit input layer # inputs = Input(shape=(298, 257, 2)) # model.add(inputs) # Convolution layers conv1 = Conv2D(96, kernel_size=(1,7), padding='same', dilation_rate=(1,1), input_shape=(298, 257, 2), name="input_layer") model.add(conv1) model.add(BatchNormalization()) model.add(Activation("relu")) conv2 = Conv2D(96, kernel_size=(7,1), padding='same', dilation_rate=(1,1)) model.add(conv2) model.add(BatchNormalization()) model.add(Activation("relu")) conv3 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(1,1)) model.add(conv3) model.add(BatchNormalization()) model.add(Activation("relu")) conv4 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(2,1)) model.add(conv4) model.add(BatchNormalization()) model.add(Activation("relu")) conv5 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(4,1)) model.add(conv5) model.add(BatchNormalization()) model.add(Activation("relu")) conv6 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(8,1)) model.add(conv6) model.add(BatchNormalization()) model.add(Activation("relu")) conv7 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(16,1)) model.add(conv7) model.add(BatchNormalization()) model.add(Activation("relu")) conv8 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(32,1)) model.add(conv8) model.add(BatchNormalization()) model.add(Activation("relu")) conv9 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(1,1)) model.add(conv9) model.add(BatchNormalization()) model.add(Activation("relu")) conv10 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(2,2)) model.add(conv10) model.add(BatchNormalization()) model.add(Activation("relu")) conv11 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(4,4)) model.add(conv11) model.add(BatchNormalization()) model.add(Activation("relu")) conv12 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(8,8)) model.add(conv12) model.add(BatchNormalization()) model.add(Activation("relu")) conv13 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(16,16)) model.add(conv13) model.add(BatchNormalization()) model.add(Activation("relu")) conv14 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(32,32)) model.add(conv14) model.add(BatchNormalization()) model.add(Activation("relu")) conv15 = Conv2D(8, kernel_size=(1,1), padding='same', dilation_rate=(1,1)) model.add(conv15) model.add(BatchNormalization()) model.add(Activation("relu")) # == AV fused neural network == # AV fusion step(s) model.add(TimeDistributed(Flatten())) # BLSTM new_matrix_length = 400 model.add(Bidirectional(LSTM(new_matrix_length//2, return_sequences=True, input_shape=(298, 257*8)))) # Fully connected layers model.add(Dense(600, activation="relu")) model.add(Dense(600, activation="relu")) model.add(Dense(600, activation="relu")) # Output layer (i.e. complex masks) # outputs = Dense(257*2*num_speakers, activation="relu") outputs = Dense(257*2*num_speakers, activation="sigmoid") # TODO: check if this is more correct (based on the paper) model.add(outputs) outputs_complex_masks = Reshape((298, 257, 2, num_speakers), name="output_layer") model.add(outputs_complex_masks) # Print the output shapes of each model layer for layer in model.layers: name = layer.get_config()["name"] if "batch_normal" in name or "activation" in name: continue print(layer.output_shape, "\t", name) # Alternatively, print the default keras model summary print(model.summary()) # Compile the model before training # model.compile(optimizer='adam', loss='mse') model.compile(optimizer='adam', loss='mse', metrics=['accuracy']) return model
def train(self, vocab_size=None, split_ratio=0.9, num_epochs=5): if self.classes is None: print( 'Classes list is none, did you use parse method before calling train?' ) return if self.train_data is None: print( 'Train dataframe is none, did you use parse method before calling train?' ) return # ----------- convert train df to numpy array for X and Y ----------- train_test_data = [] self.features_ordered = [ 'unsubscribe', 'extensions', 'sender', 'subject', 'text' ] for i, row in self.train_data.iterrows(): x = '' for col in self.features_ordered: if row[col] is not None and len(row[col]) > 0: x += str(row[col]).lower() + ' ' x = x.strip() idx = self.classes.index(row['type']) if idx == -1: continue y = np.zeros(len(self.classes)) y[idx] = 1 train_test_data.append((x, y)) train_test_data = np.array(train_test_data) # ----------- split to train x, y; test x, y----------- idx = int(len(train_test_data) * split_ratio) np.random.shuffle(train_test_data) train = train_test_data[:idx] test = train_test_data[idx:] train_x = np.array([i[0] for i in train]) train_y = np.array([i[1] for i in train]) test_x = np.array([i[0] for i in test]) test_y = np.array([i[1] for i in test]) # -------- build model -------- encoder = TextVectorization(max_tokens=vocab_size) encoder.adapt(train_x) self.model = Sequential([ encoder, Embedding(input_dim=len(encoder.get_vocabulary()), output_dim=64, mask_zero=True), Bidirectional(LSTM(64)), Dense(64, activation='relu'), Dense(len(self.classes), activation='softmax') ]) self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # -------- train model -------- self.model.fit(x=train_x, y=train_y, batch_size=64, epochs=num_epochs, validation_data=(test_x, test_y))
print(tokenizer.word_index['athy']) print(tokenizer.word_index['one']) print(tokenizer.word_index['jeremy']) print(tokenizer.word_index['lanigan']) print(xs[6]) print(ys[6]) print(xs[5]) print(ys[5]) print(tokenizer.word_index) model = Sequential() model.add(Embedding(total_words, 100, input_length=max_sequence_len - 1)) model.add(Bidirectional(LSTM(150))) model.add(Dense(total_words, activation='softmax')) adam = Adam(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) #earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto') history = model.fit(xs, ys, epochs=100, verbose=1) #print model.summary() print(model) import matplotlib.pyplot as plt def plot_graphs(history, string): plt.plot(history.history[string])
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14): image_height, image_width = input_shape output_length, num_classes = output_shape num_windows = int((image_width - window_width) / window_stride) + 1 if num_windows < output_length: raise ValueError( f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})' ) image_input = Input(shape=input_shape, name='image') y_true = Input(shape=(output_length, ), name='y_true') input_length = Input(shape=(1, ), name='input_length') label_length = Input(shape=(1, ), name='label_length') gpu_present = len(device_lib.list_local_devices()) > 1 lstm_fn = CuDNNLSTM if gpu_present else LSTM # Your code should use slide_window and extract image patches from image_input. # Pass a convolutional model over each image patch to generate a feature vector per window. # Pass these features through one or more LSTM layers. # Convert the lstm outputs to softmax outputs. # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length). ##### Your code below (Lab 3) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) image_patches = Lambda(slide_window, arguments={ 'window_width': window_width, 'window_stride': window_stride })(image_reshaped) # (num_windows, image_height, window_width, 1) # Make a LeNet and get rid of the last two layers (softmax and dropout) convnet = lenet((image_height, window_width, 1), (num_classes, )) convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output) convnet_outputs = TimeDistributed(convnet)(image_patches) # (num_windows, 128) lstm_output = Bidirectional(lstm_fn( 256, return_sequences=True))(convnet_outputs) # add additional layer lstm_output = Bidirectional(lstm_fn(128, return_sequences=True))(lstm_output) # (num_windows, 128) softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output) # (num_windows, num_classes) ##### Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows})(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')( [y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded')([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output]) return model
encoder_inputs = Input(shape=(max_qc_len, ), dtype='int32') emb_matrix = np.zeros((vocab_len, emb_dim)) for word, index in word_to_index.items(): if index != 0: emb_matrix[index, :] = word_to_vec_map[word] embedding_layer = Embedding(vocab_len, emb_dim, trainable=False, mask_zero=True) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) encoder_embeddings = embedding_layer(encoder_inputs) encoder = Bidirectional(LSTM(state_dim, return_state=True), merge_mode='concat')(encoder_embeddings) encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder state_h = Concatenate()([forward_h, backward_h]) state_c = Concatenate()([forward_c, backward_c]) encoder_states = [state_h, state_c] decoder_inputs = Input(shape=(max_ans_len, )) decoder_embeddings = embedding_layer(decoder_inputs) decoder_lstm = LSTM(state_dim * 2, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_embeddings, initial_state=encoder_states) outputs = TimeDistributed(Dense(vocab_len, activation='softmax'))(decoder_outputs)
def create_test_model(logdir, time): args = sys.argv[1:] pickle_in = open( "pickles/classifier_" + args[0] + "_network_input_" + args[1] + "_normalized.pickle", "rb") X = pickle.load(pickle_in) print(X[:3]) pickle_in = open( "pickles/classifier_" + args[0] + "_network_" + args[1] + "_labels.pickle", "rb") y = pickle.load(pickle_in) print(y[:500]) pickle_in = open( "pickles/classifier_" + args[0] + "_network_input_" + args[1] + "_normalized_EVALUATION.pickle", "rb") X_test = pickle.load(pickle_in) print(X[:3]) pickle_in = open( "pickles/classifier_" + args[0] + "_network_" + args[1] + "_labels_EVALUATION.pickle", "rb") y_test = pickle.load(pickle_in) y = np.array(y) X = np.array(X) y_test = np.array(y_test) X_test = np.array(X_test) model = Sequential() model.add(LSTM(32, input_shape=(X.shape[1:]), return_sequences=True)) model.add(Dropout(0.1)) model.add(BatchNormalization()) model.add(Dense(32)) model.add(Bidirectional(LSTM(32))) model.add(Dropout(0.1)) model.add(BatchNormalization()) model.add(Dense(1, activation='sigmoid')) # softmax # opt = tf.keras.optimizers.Adam(lr=0.002, decay=1e-6) # y = tf.keras.utils.to_categorical(y) # Compile model model.compile( loss='binary_crossentropy', # categorical_crossentropy' optimizer='rmsprop', metrics=['accuracy'], ) model.fit( X, y, epochs=int(args[2]), batch_size=128, ) val_loss, val_acc = model.evaluate(X_test, y_test) print(val_loss) print(val_acc) if not os.path.exists(logdir): os.makedirs(logdir) model.save(logdir + "/" + str(val_acc) + "_trained_model_" + time + ".h5") return val_acc
def CNN_BiLSTM(self, max_label_len): # Model architecture input_ = Input(shape=(32, 128, 1)) # CNN conv2d_1 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(input_) maxpool_2d_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv2d_1) conv2d_2 = Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same')(maxpool_2d_1) maxpool_2d_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv2d_2) conv2d_3 = Conv2D(filters=256, kernel_size=(3, 3), activation='relu', padding='same')(maxpool_2d_2) conv2d_4 = Conv2D(filters=256, kernel_size=(3, 3), activation='relu', padding='same')(conv2d_3) maxpool_2d_3 = MaxPool2D(pool_size=(2, 1))(conv2d_4) conv2d_5 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu', padding='same')(maxpool_2d_3) batch_norm_5 = BatchNormalization()(conv2d_5) conv2d_6 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu', padding='same')(batch_norm_5) batch_norm_6 = BatchNormalization()(conv2d_6) maxpool_2d_4 = MaxPool2D(pool_size=(2, 1))(batch_norm_6) conv2d_7 = Conv2D(filters=512, kernel_size=(2, 2), activation='relu')(maxpool_2d_4) squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv2d_7) blstm1 = Bidirectional(LSTM(256, return_sequences=True, dropout=0.2))(squeezed) # = BatchNormalization()(blstm1) blstm2 = Bidirectional(LSTM(256, return_sequences=True, dropout=0.2))(blstm1) #blstm3 = Bidirectional(LSTM(256,return_sequences=True,dropout=0.2))(blstm2) outputs = Dense(len(self.char_list) + 1, activation='softmax')(blstm2) #(31,63) cnn_lstm_ = Model(input_, outputs) # LSTM layer inputs labels = Input(name='the_labels', shape=[max_label_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(self.CTC_LOSS, output_shape=(1, ), name='ctc')( [outputs, labels, input_length, label_length]) training_model = Model( inputs=[input_, labels, input_length, label_length], outputs=loss_out) return training_model, cnn_lstm_
def AV_model(people_num=2): def UpSampling2DBilinear(size): return Lambda(lambda x: tf.image.resize_bilinear(x, size, align_corners=True)) def sliced(x, index): return x[:, :, :, index] # --------------------------- AS start --------------------------- audio_input = Input(shape=(298, 257, 2)) print('as_0:', audio_input.shape) as_conv1 = Convolution2D(96, kernel_size=(1, 7), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv1')(audio_input) as_conv1 = BatchNormalization()(as_conv1) as_conv1 = ReLU()(as_conv1) print('as_1:', as_conv1.shape) as_conv2 = Convolution2D(96, kernel_size=(7, 1), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv2')(as_conv1) as_conv2 = BatchNormalization()(as_conv2) as_conv2 = ReLU()(as_conv2) print('as_2:', as_conv2.shape) as_conv3 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv3')(as_conv2) as_conv3 = BatchNormalization()(as_conv3) as_conv3 = ReLU()(as_conv3) print('as_3:', as_conv3.shape) as_conv4 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(2, 1), name='as_conv4')(as_conv3) as_conv4 = BatchNormalization()(as_conv4) as_conv4 = ReLU()(as_conv4) print('as_4:', as_conv4.shape) as_conv5 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(4, 1), name='as_conv5')(as_conv4) as_conv5 = BatchNormalization()(as_conv5) as_conv5 = ReLU()(as_conv5) print('as_5:', as_conv5.shape) as_conv6 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(8, 1), name='as_conv6')(as_conv5) as_conv6 = BatchNormalization()(as_conv6) as_conv6 = ReLU()(as_conv6) print('as_6:', as_conv6.shape) as_conv7 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(16, 1), name='as_conv7')(as_conv6) as_conv7 = BatchNormalization()(as_conv7) as_conv7 = ReLU()(as_conv7) print('as_7:', as_conv7.shape) as_conv8 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(32, 1), name='as_conv8')(as_conv7) as_conv8 = BatchNormalization()(as_conv8) as_conv8 = ReLU()(as_conv8) print('as_8:', as_conv8.shape) as_conv9 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv9')(as_conv8) as_conv9 = BatchNormalization()(as_conv9) as_conv9 = ReLU()(as_conv9) print('as_9:', as_conv9.shape) as_conv10 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(2, 2), name='as_conv10')(as_conv9) as_conv10 = BatchNormalization()(as_conv10) as_conv10 = ReLU()(as_conv10) print('as_10:', as_conv10.shape) as_conv11 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(4, 4), name='as_conv11')(as_conv10) as_conv11 = BatchNormalization()(as_conv11) as_conv11 = ReLU()(as_conv11) print('as_11:', as_conv11.shape) as_conv12 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(8, 8), name='as_conv12')(as_conv11) as_conv12 = BatchNormalization()(as_conv12) as_conv12 = ReLU()(as_conv12) print('as_12:', as_conv12.shape) as_conv13 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(16, 16), name='as_conv13')(as_conv12) as_conv13 = BatchNormalization()(as_conv13) as_conv13 = ReLU()(as_conv13) print('as_13:', as_conv13.shape) as_conv14 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(32, 32), name='as_conv14')(as_conv13) as_conv14 = BatchNormalization()(as_conv14) as_conv14 = ReLU()(as_conv14) print('as_14:', as_conv14.shape) as_conv15 = Convolution2D(8, kernel_size=(1, 1), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv15')(as_conv14) as_conv15 = BatchNormalization()(as_conv15) as_conv15 = ReLU()(as_conv15) print('as_15:', as_conv15.shape) AS_out = Reshape((298, 8 * 257))(as_conv15) print('AS_out:', AS_out.shape) # --------------------------- AS end --------------------------- # --------------------------- VS_model start --------------------------- VS_model = Sequential() VS_model.add(Convolution2D(256, kernel_size=(7, 1), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='vs_conv1')) VS_model.add(BatchNormalization()) VS_model.add(ReLU()) VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='vs_conv2')) VS_model.add(BatchNormalization()) VS_model.add(ReLU()) VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(2, 1), name='vs_conv3')) VS_model.add(BatchNormalization()) VS_model.add(ReLU()) VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(4, 1), name='vs_conv4')) VS_model.add(BatchNormalization()) VS_model.add(ReLU()) VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(8, 1), name='vs_conv5')) VS_model.add(BatchNormalization()) VS_model.add(ReLU()) VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(16, 1), name='vs_conv6')) VS_model.add(BatchNormalization()) VS_model.add(ReLU()) VS_model.add(Reshape((75, 256, 1))) VS_model.add(UpSampling2DBilinear((298, 256))) VS_model.add(Reshape((298, 256))) # --------------------------- VS_model end --------------------------- video_input = Input(shape=(75, 1, 1792, people_num)) AVfusion_list = [AS_out] for i in range(people_num): single_input = Lambda(sliced, arguments={'index': i})(video_input) VS_out = VS_model(single_input) AVfusion_list.append(VS_out) AVfusion = concatenate(AVfusion_list, axis=2) AVfusion = TimeDistributed(Flatten())(AVfusion) print('AVfusion:', AVfusion.shape) lstm = Bidirectional(LSTM(400, input_shape=(298, 8 * 257), return_sequences=True), merge_mode='sum')(AVfusion) print('lstm:', lstm.shape) fc1 = Dense(600, name="fc1", activation='relu', kernel_initializer=he_normal(seed=27))(lstm) print('fc1:', fc1.shape) fc2 = Dense(600, name="fc2", activation='relu', kernel_initializer=he_normal(seed=42))(fc1) print('fc2:', fc2.shape) fc3 = Dense(600, name="fc3", activation='relu', kernel_initializer=he_normal(seed=65))(fc2) print('fc3:', fc3.shape) complex_mask = Dense(257 * 2 * people_num, name="complex_mask", kernel_initializer=glorot_uniform(seed=87))(fc3) print('complex_mask:', complex_mask.shape) complex_mask_out = Reshape((298, 257, 2, people_num))(complex_mask) print('complex_mask_out:', complex_mask_out.shape) AV_model = Model(inputs=[audio_input, video_input], outputs=complex_mask_out) # # compile AV_model # AV_model.compile(optimizer='adam', loss='mse') return AV_model
def get_test_model_lstm(): """Returns a test model for Long Short-Term Memory (LSTM) layers.""" input_shapes = [ (17, 4), (1, 10), (None, 4), (12,), (12,) ] inputs = [Input(shape=s) for s in input_shapes] outputs = [] for inp in inputs[:2]: lstm_sequences = LSTM( units=8, recurrent_activation='relu', return_sequences=True )(inp) lstm_regular = LSTM( units=3, recurrent_activation='sigmoid', return_sequences=False )(lstm_sequences) outputs.append(lstm_regular) lstm_state, state_h, state_c = LSTM( units=3, recurrent_activation='sigmoid', return_state=True )(inp) outputs.append(lstm_state) outputs.append(state_h) outputs.append(state_c) lstm_bidi_sequences = Bidirectional( LSTM( units=4, recurrent_activation='hard_sigmoid', return_sequences=True ) )(inp) lstm_bidi = Bidirectional( LSTM( units=6, recurrent_activation='linear', return_sequences=False ) )(lstm_bidi_sequences) outputs.append(lstm_bidi) lstm_gpu_regular = LSTM( units=3, activation='tanh', recurrent_activation='sigmoid', use_bias=True )(inp) lstm_gpu_bidi = Bidirectional( LSTM( units=3, activation='tanh', recurrent_activation='sigmoid', use_bias=True ) )(inp) outputs.append(lstm_gpu_regular) outputs.append(lstm_gpu_bidi) outputs.extend(LSTM(units=12, return_sequences=True, return_state=True)(inputs[2], initial_state=[inputs[3], inputs[4]])) model = Model(inputs=inputs, outputs=outputs, name='test_model_lstm') model.compile(loss='mse', optimizer='nadam') # fit to dummy data training_data_size = 2 data_in = generate_input_data(training_data_size, input_shapes) initial_data_out = model.predict(data_in) data_out = generate_output_data(training_data_size, initial_data_out) model.fit(data_in, data_out, epochs=10) return model