def sequential_autoencoder(self, x, num_layers, timesteps = 5): """LSTM Autoencoder to extract high level features Note that the x input is still in a raw format [m,n] where m are dates and n are the features. The sequence length is still given by timesteps, and some data manipulation needs to be made before inputing the data in the model. This is why there is the function create_variable_for_model """ # Input inputs = Input(shape = (timesteps, x.shape[1])) # Encoder encoder = LSTM(num_layers, activation = 'tanh')(inputs) # Decoder decoder = RepeatVector(timesteps)(encoder) decoder = LSTM(x.shape[1], return_sequences = True, activation = 'tanh')(decoder) # Models sequential_autoencoder = Model(inputs, decoder) sequential_autoencoder.compile(loss = 'mean_squared_error', optimizer = 'adam') encoder = Model(inputs, encoder) encoder.compile(loss = 'mean_squared_error', optimizer = 'adam') return sequential_autoencoder, encoder
def model1(X_train): inputs = Input(shape=(X_train.shape[1], X_train.shape[2])) model1 = LSTM(10, activation='relu', return_sequences=True)(inputs) model1 = LSTM(10, activation='relu')(model1) output = Dense(1, activation='sigmoid')(model1) model1 = Model(inputs=inputs, outputs=output) model1.compile(loss='mean_squared_error', optimizer='adam') return model1
def __build_model(input_shape, embeddings_layer, output_size): sentence_indices = Input(input_shape, dtype='int32') model = embeddings_layer(sentence_indices) model = LSTM(256, return_sequences=True)(model) model = Dropout(0.5)(model) model = LSTM(256, return_sequences=False)(model) model = Dropout(0.5)(model) model = Dense(512)(model) model = Dropout(0.3)(model) model = Dense(output_size)(model) model = Activation('softmax')(model) model = Model(inputs=sentence_indices, outputs=model) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def _train(self): data, label = self._load_data() train_data, train_label, validate_data, validate_label, test_data, test_label = split_data(data, label, to_categorical=True) network_input = Input(shape=(100, 3)) network = LSTM(32, return_sequences=True)(network_input) network = LSTM(32)(network) network = Dense(5, activation=softmax)(network) network = Model(inputs=[network_input], outputs=[network]) network.compile(optimizer=RMSprop(lr=0.01), loss=categorical_crossentropy, metrics=[categorical_accuracy]) network.summary() callback = [ callbacks.ReduceLROnPlateau(monitor="categorical_accuracy", factor=0.1, patience=3) ] self.train_history = network.fit(train_data, train_label, validation_data=(validate_data, validate_label), batch_size=self.BATCH_SIZE, epochs=self.EPOCHS, callbacks=callback) self.evaluate_history = network.evaluate(test_data, test_label, batch_size=self.BATCH_SIZE) return network
# model model = concatenate([conv1, conv3, conv5], axis=-1) # LSTM Layer model = LSTM(32, return_sequences=True)(model) model = LSTM(32, return_sequences=False)(model) output = Dense(3, activation='softmax')(model) model = Model(input1, output) model.summary() # Optimizer = optimizers.Adam(lr = 0.00001) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) callback_list = [ ReduceLROnPlateau( # Model의 Val_loss를 Monitoring monitor='val_loss', # Callback 호출시 Learning rate를 1/10으로 줄임 factor=0.1, # Val_loss가 5 Epoch동안 개선되지 않을 경우 CallBack 호출 patience=5), EarlyStopping( # Monitoring Index monitor='val_loss', # 5 Epoch보다 더 길게(즉 6 Epoch 동안) 정확도 향상이 없을 경우 Early Stop patience=5)
target_vectors[:, :, 5:], axis=2) output_size = GAUSSIAN_MIXTURE_COMPONENTS * 6 + ONE_HOT_LEN Loss = GaussianMixtureLoss(GAUSSIAN_MIXTURE_COMPONENTS, max_points) inputs = Input(shape=(max_points, INPUT_VECTOR_LEN)) model = LSTM(LSTM_SIZE, activation='relu', return_sequences=True)(inputs) for layer in range(REPEAT_DEEP_ARCH): model = LSTM(LSTM_SIZE, activation='relu', return_sequences=True)(model) model = Dense(DENSE_SIZE, activation='relu')(model) model = Dense(output_size)(model) model = Model(inputs, model) model.compile( loss=Loss.geom_gaussian_mixture_loss, optimizer=OPTIMIZER) model.summary() tb_callback = TensorBoard(log_dir='./tensorboard_log/' + TIMESTAMP + ' ' + SCRIPT_NAME, write_graph=False) decypher = DecypherAll(gmm_size=GAUSSIAN_MIXTURE_COMPONENTS, plot_dir=PLOT_DIR) history = model.fit( x=training_vectors, y=target_vectors, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=TRAIN_VALIDATE_SPLIT, callbacks=[decypher, tb_callback]).history notify(TIMESTAMP, SCRIPT_NAME, 'validation loss of ' + str(history['val_loss'][-1]))
loaded = np.load(DATA_FILE) training_vectors = loaded['input_geoms'] (data_points, max_points, GEO_VECTOR_LEN) = training_vectors.shape # Bring coordinates and distance in roughly the same scale means = localized_mean(training_vectors) training_vectors = localized_normal(training_vectors, means, 1e4) target_vectors = loaded['centroid_distance'][:, 0, :] inputs = Input(name='Input', shape=(max_points, GEO_VECTOR_LEN)) model = LSTM(LATENT_SIZE, activation='relu')(inputs) model = Dense(2)(model) model = Model(inputs, model) model.compile(loss=univariate_gaussian_loss, optimizer=OPTIMIZER) model.summary() callbacks = [ TensorBoard(log_dir='./tensorboard_log/' + TIMESTAMP + ' ' + SCRIPT_NAME, write_graph=False), DecypherAll(lambda x: str(x)), EarlyStopping(patience=40, min_delta=1e-4) ] history = model.fit(x=training_vectors, y=target_vectors, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=TRAIN_VALIDATE_SPLIT, callbacks=callbacks).history
EPOCHS = 200 BATCH_SIZE = 512 TRAINING_SIZE = 100000 TRAIN_VALIDATE_SPLIT = 0.2 input_2d = np.repeat([[[0.2, 15, 0, 0, 0]]], 11, axis=1) input_2d = np.repeat(input_2d, TRAINING_SIZE, axis=0) (data_points, max_points, vector_len) = input_2d.shape inputs = Input(name='Input', shape=(max_points, vector_len)) model = LSTM(vector_len, return_sequences=True)(inputs) # The dense layer is required for input values exceeding 1e0 model = Dense(vector_len)(model) model = Model(inputs, model) model.compile( loss=bivariate_gaussian_loss, optimizer=Adam(lr=0.01)) model.summary() callbacks = [ TensorBoard(log_dir='./tensorboard_log/' + TIMESTAMP, write_graph=False), DecypherAll(lambda x: str(x)), EarlyStopping(patience=20) ] history = model.fit(x=input_2d, y=input_2d, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=TRAIN_VALIDATE_SPLIT, callbacks=callbacks).history
copyfile(__file__, 'configs/' + TIMESTAMP + ' ' + SCRIPT_NAME) loaded = np.load(DATA_FILE) training_vectors = loaded['point_sequence'] (set_size, GEO_VECTOR_LEN) = training_vectors.shape training_vectors = np.reshape(training_vectors, (set_size, 1, GEO_VECTOR_LEN)) target_vectors = loaded['intersection_surface'] inputs = Input(shape=(1, GEO_VECTOR_LEN)) model = LSTM(LATENT_SIZE, activation='relu', return_sequences=True)(inputs) model = Dense(32, activation='relu')(model) model = LSTM(LATENT_SIZE, activation='relu')(model) model = Dense(32, activation='relu')(model) model = Dense(1)(model) model = Model(inputs, model) model.compile(loss='mse', optimizer=OPTIMIZER) model.summary() tb_callback = TensorBoard(log_dir='./tensorboard_log/' + TIMESTAMP + ' ' + SCRIPT_NAME, histogram_freq=1, write_graph=True) epoch_callback = EpochLogger(input_func=GeoVectorizer.decypher, target_func=lambda x: str(x), predict_func=lambda x: str(x), aggregate_func=None, stdout=True) model.fit(x=training_vectors, y=target_vectors, epochs=EPOCHS,
def main(argv): global PATH_IN, PATH_SCRIPT, PATH_OUT PATH_IN, PATH_SCRIPT, PATH_OUT = get_path() if (len(argv) == 2): from keras.models import model_from_json fileX = argv[0] X_minmax, X_meanvar, t = load_(fileX) json_file = open("model/LSTM.json", 'r') loaded_model_json = json_file.read() json_file.close() LSTM = model_from_json(loaded_model_json) # load weights into new model LSTM.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy']) LSTM.load_weights("model/LSTM.h5") # Report("Loaded LSTM from disk") res = pd.DataFrame( LSTM.predict_proba( np.reshape(X_minmax, (X_minmax.shape[0], 1, X_minmax.shape[1])))) res.to_csv(str(fileX.split('.')[0]) + '_temp_LSTM.csv', index=False) return res else: if (len(argv) == 0): argv = [0.35] THRESHOLD = float(argv[0]) ##### get files names ### names = pd.read_csv('files.csv') fileX_train = literal_eval(names['fileX_train'][0]) fileY_train = literal_eval(names['fileY_train'][0]) fileX_valid = literal_eval(names['fileX_valid'][0]) fileY_valid = literal_eval(names['fileY_valid'][0]) fileX_test = literal_eval(names['fileX_test'][0]) fileY_test = literal_eval(names['fileY_test'][0]) X_train, Y_train, _ = load(fileX_train, fileY_train) X_valid, Y_valid, _ = load(fileX_valid, fileY_valid) X_test, Y_test, t = load(fileX_test, fileY_test) model, hist = model_fit(X_train, Y_train, X_valid, Y_valid) import matplotlib.pyplot as plt plt.plot(hist.losses, 'b', hist.val_losses, 'r') plt.savefig('plot3.png') plt.close() plt.plot(hist.fbeta, 'b', hist.val_fbeta, 'r') plt.savefig('plot4.png') pred = model.predict_proba(X_test) pred = np.clip(pred, 0, 1) testPredict = list([1 if i[0] > THRESHOLD else 0 for i in pred]) # plot results plot_res(t, testPredict, Y_test) pred_valid = model.predict_proba(X_valid) res_valid = pd.DataFrame(pred_valid) res_valid.to_csv('LSTM_valid.csv', index=False) res = pd.DataFrame(pred) res.to_csv('LSTM.csv', index=False) model_json = model.to_json() with open("model/LSTM.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("model/LSTM.h5") return res
def CNN_model(filename, train, X_train, X_test, word2ind, maxWords, y_train, y_test, ind2label, maxChar, char2ind, validation=False, X_valid=None, y_valid=None, pretrained_embedding="", word_embedding_size=100, char_embedding_size=50, lstm_hidden=32, nbr_epochs=25, batch_size=128, dropout=0.5, optimizer='rmsprop', early_stopping_patience=-1, folder_path="CNN_results", print_to_file = True, gen_confusion_matrix=False, return_model = False ): """ Build, train and test the CNN-CNN-LSTM Keras model. Works for multi-tasking learning. The model architecture looks like: - CNN character-level representation - CNN word-level + character representation - LSTM - Softmax for prediction :param filename: File to redirect the printing :param train: Boolean if the model must be trained or not. If False, the model's wieght are expected to be stored in "folder_path/filename/filename.h5" :param X_train: Data to train the model. It must be a list of word and character indices. :param X_test: Data to test the model. It must be a list of word and character indices. :param word2ind: Dictionary containing all words in the training data and a unique integer per word :param maxWords: Maximum number of words in a sequence :param y_train: Labels to train the model for the prediction task :param y_test: Labels to test the model for the prediction task :param ind2label: Dictionary where all labels are mapped into a unique integer :param maxChar: The maximum numbers of characters in a word. If set to 0, the model will not use character-level representations of the words :param char2ind: A dictionary where each character is mapped into a unique integer :param validation: Boolean. If true, the validation score will be computed from 'X_valid' and 'y_valid' :param X_valid: Optional. Validation dataset :param y_valid: Optional. Validation dataset labels :param pretrained_embedding: Use the pretrained word embeddings. Pretrained vectors must be located here: "dataset/pretrained_vectors" Three values: - "": Do not use pre-trained word embeddings (Default) - False: Use the pre-trained embedding vectors as the weights in the Embedding layer - True: Use the pre-trained embedding vectors as weight initialiers. The Embedding layer will still be trained. :param word_embedding_size: Size of the pre-trained word embedding to use (100 or 300) :param char_embedding_size: size of the character-level word representations :param lstm_hidden: Dimentionality of the LSTM output space :param nbr_epochs: Number of epochs to train the model :param batch_size: Size of batches while training the model :param dropout: Rate to apply for each Dropout layer in the model :param optimizer: Optimizer to use while compiling the model :param early_stopping_patience: Number of continuous tolerated epochs without improvement during training. :param folder_path: Path to the directory storing all to-be-generated files :param print_to_file: if True redirects the printings to a file (given in filename), if False std_out is kept :param gen_confusion_matrix: Boolean value. Generated confusion matrices or not. :parm return_model: if True returns the Keras model object, otherwise return best results :return: The classification scores for both tasks (default for compatibility). If returnModel = True, returns model object for further computation """ print("====== {0} start ======".format(filename)) end_string = "====== {0} end ======".format(filename) os.makedirs(folder_path+"/"+filename, exist_ok=True) filepath = folder_path+"/"+filename+"/"+filename # Set print outputs file if print_to_file: file, stdout_original = setPrintToFile("{0}.txt".format(filepath)) nbr_words = len(word2ind)+1 out_size = len(ind2label)+1 nbr_chars = len(char2ind)+1 best_results = None # Embedding - Characters character_input = Input((maxWords,maxChar,)) embed_char_out = TimeDistributed(Embedding(nbr_chars, char_embedding_size))(character_input) conv1d_out = TimeDistributed(Convolution1D(filters = 50, kernel_size = 3, strides=1, activation="relu", padding='same'))(embed_char_out) pool_out = TimeDistributed(MaxPooling1D(pool_size=2, strides=1, padding='same'))(conv1d_out) char_enc = TimeDistributed(Flatten())(pool_out) # Embedding - Words word_input = Input((maxWords,)) if pretrained_embedding=="": word_emb = Embedding(nbr_words, word_embedding_size)(word_input) else: # Retrieve embeddings from word2vec embedding_matrix = word2VecEmbeddings(word2ind, word_embedding_size) word_emb = Embedding(nbr_words, word_embedding_size, weights=[embedding_matrix], trainable=pretrained_embedding, mask_zero=False)(word_input) # Model inputs inputs = [word_input, character_input] # Full word representation word_full = concatenate([char_enc, word_emb]) # encode words w_full = w_char + w_embed conv1d_w1_out = Convolution1D(filters = 800, kernel_size = 5, strides=1, activation="relu", padding='same')(word_full) drop_w1_layer = Dropout(dropout, noise_shape=None, seed=None)(conv1d_w1_out) conv1d_w2_out = Convolution1D(filters = 800, kernel_size = 5, strides=1, activation="relu", padding='same')(drop_w1_layer) # pool size out? word_enc = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv1d_w2_out) # LSTM layer model = LSTM(lstm_hidden, return_sequences=True, dropout=dropout)(word_enc) # Output - Softmax outputs = [Dense(out_size, activation='softmax')(model) for out_size in [len(x)+1 for x in ind2label]] model_loss = ['categorical_crossentropy' for x in outputs] model_metrics = None # Model model = Model(inputs=inputs, outputs=outputs) model.compile(loss=model_loss, metrics=model_metrics, optimizer=get_optimizer(optimizer)) print(model.summary(line_length=150),"\n\n\n\n") # Training Callbacks: callbacks = [] value_to_monitor = 'val_f1' best_model_weights_path = "{0}.h5".format(filepath) # 1) Classifition scores classification_scores = Classification_Scores([X_train, y_train], ind2label, best_model_weights_path) callbacks.append(classification_scores) # 2) EarlyStopping if early_stopping_patience != -1: early_stopping = EarlyStopping(monitor=value_to_monitor, patience=early_stopping_patience, mode='max') callbacks.append(early_stopping) else: early_stopping = EarlyStopping(monitor=value_to_monitor, patience=nbr_epochs, mode='max') callbacks.append(early_stopping) # Train if train: # Train the model. Keras's method argument 'validation_data' is referred as 'testing data' in this code. hist = model.fit(X_train, y_train, validation_data=[X_test, y_test], epochs=nbr_epochs, batch_size=batch_size, callbacks=callbacks, verbose=2) print() print('-------------------------------------------') print("Best F1 score:", early_stopping.best, " (epoch number {0})".format(1+np.argmax(hist.history[value_to_monitor]))) # Save Training scores save_model_training_scores("{0}".format(filepath), hist, classification_scores) # Print best testing classification report best_epoch = np.argmax(hist.history[value_to_monitor]) print(classification_scores.test_report[best_epoch]) # Best epoch results best_results = model_best_scores(classification_scores, best_epoch) # HACK: optmizer weight length issue # https://github.com/keras-team/keras/issues/4044 import h5py with h5py.File(best_model_weights_path, 'a') as f: if 'optimizer_weights' in f.keys(): del f['optimizer_weights'] # Load weigths from best training epoch into model save_load_utils.load_all_weights(model, best_model_weights_path) # Create confusion matrices if gen_confusion_matrix: for i, y_target in enumerate(y_test): # Compute predictions, flatten predictions, target = compute_predictions(model, X_test, y_target, ind2label[i]) # Generate confusion matrices save_confusion_matrix(target, predictions, list(ind2label[i].values()), "{0}_task_{1}_confusion_matrix_test".format(filepath,str(i+1))) # Validation dataset if validation: print() print("Validation dataset") print("======================") # Compute classification report for i, y_target in enumerate(y_valid): # Compute predictions, flatten predictions, target = compute_predictions(model, X_valid, y_target, ind2label[i], nbrTask=i) # Only for multi-task if len(y_train) > 1: print("For task "+str(i+1)+"\n") print("====================================================================================") print("") print("With padding into account") print(metrics.flat_classification_report([target], [predictions], digits=4)) print("") print('----------------------------------------------') print("") print("Without the padding:") print(metrics.flat_classification_report([target], [predictions], digits=4, labels=list(ind2label[i].values()))) # Generate confusion matrices save_confusion_matrix(target, predictions, list(ind2label[i].values()), "{0}_task_{1}_confusion_matrix_validation".format(filepath,str(i+1))) # Close file if print_to_file: closePrintToFile(file, stdout_original) print(end_string) # Returns model itself for further computation, otherwise best results if return_model: return model else: return best_results
merge = Add([model, model]) merge = Dense(8)(merge) output = Dense(1)(merge) output_1 = Dense(4)(output) output_1 = Dense(1)(output_1) output_2 = Dense(4)(output) output_2 = Dense(1)(output_2) model = Model(inputs=[input, input2], outputs=[output_1, output_2]) model.summary() # quit() model.compile(loss='mse', optimizer='adam', metrics=['mse']) model.fit([x, x2], [y, y2], epochs=100, batch_size=1) res = model.evaluate([x, x2], [y, y2], batch_size=1) from keras.callbacks import EarlyStopping # callback = EarlyStopping(monitor='loss', patience=20, mode='auto') # # callback = EarlyStopping(monitor='acc', patience=20, mode='max') # model2.compile(loss='mse', optimizer='adam', metrics=['mse']) # model2.fit(x_, y, epochs=1000, batch_size=1, callbacks=[callback]) # loss2, mse2 = model2.evaluate(x_, y, batch_size=1, verbose=1) # x_input = np.array([[6.5, 7.5, 8.5], [50, 60, 70], [70, 80, 90], [100, 110, 120]]) # x = x_input.reshape(-1, 3, 1) # y_pred = model.predict(x)