def eval_on_dataset(dataset_path, vocab_dict, num_classes, max_input_length, steps, batch_size=100): start_time = datetime.datetime.now() _generator = create_batch_generator(dataset_path, vocab_dict, num_classes, max_input_length, batch_size) scores = model.evaluate_generator(_generator, steps) end_time = datetime.datetime.now() elapsed_time = end_time - start_time print('Evaluation time on %d samples: %s' % (steps * batch_size, str(elapsed_time))) print("Loss: %1.4f. Accuracy: %.2f%% (Chance: %0.2f%%)" % (scores[0], scores[1] * 100, 100.0 / num_classes)) return scores, elapsed_time
model = keras.models.load_model(model_checkpoint_path, custom_objects=_cust_objects) ## Run predictions if True: max_to_pred = 1000 pred_res = np.zeros([max_to_pred, num_classes]) act_res = np.zeros(max_to_pred) all_text = [] all_titles = [] print('{0}: Predicting on {1} samples'.format(datetime.datetime.now(), max_to_pred)) pred_generator = create_batch_generator(test_path, vocab_dict, num_classes, max_input_length, batch_size, return_raw_text=False, return_title=True) num_predded = 0 for pred_inputs in pred_generator: X_pred, y_true, obj_title = pred_inputs #all_text += raw_text all_titles += obj_title y_preds = model.predict(X_pred) offset = num_predded num_predded += X_pred.shape[0] pred_res[offset:offset + y_preds.shape[0], :] = y_preds act_res[offset:offset + y_true.shape[0]] = np.argmax(y_true,
embedding_trainable=embedding_trainable) model.compile(loss=loss_, optimizer=optimizer_, metrics=log_metrics) #-----------------------# print('Model summary') print(model.summary()) ## Training if initial_epoch < epochs: training_start_time = datetime.datetime.now() print('{0}: Starting training at epoch {1}/{2}'.format( training_start_time, initial_epoch, epochs)) train_generator = create_batch_generator(train_path, vocab_dict, num_classes, max_input_length, batch_size) history = model.fit_generator(train_generator, batches_per_epoch, epochs, callbacks=_callbacks, initial_epoch=initial_epoch) training_end_time = datetime.datetime.now() print('{0}: Training finished at epoch {1}'.format( training_end_time, epochs)) training_time = training_end_time - training_start_time print('{0} elapsed to train {1} epochs'.format(str(training_time), epochs - initial_epoch)) ## Evaluation of final model
def main_func(max_input_length, batch_size, batches_per_epoch, epochs, loss_, optimizer_, _config, do_final_eval=True): # Bring these keys into general namespace # Note that '_config' variable name subject to change model_tag = 'cnn_lstm_denovo_trainable_embed' train_path = './dbpedia_csv/train_shuf.csv' test_path = './dbpedia_csv/test_shuf.csv' class_labels = './dbpedia_csv/classes.txt' google_word2vec = '/home/denys/word2vec-GoogleNews-vectors/GoogleNews-vectors-negative300.bin.gz' #string_keys = ['model_tag', 'train_path', 'test_path', 'class_labels', 'google_word2vec'] # for key in string_keys: # exec('%s = "%s"' % (key, _config[key])) # print(train_path) # Dynamically created logging directories log_dir = './keras_logs_%s' % model_tag train_log_dir = '%s/train' % log_dir val_log_dir = '%s/val' % log_dir custom_log_dir = '%s/custom' % log_dir model_dir = 'models_%s' % model_tag model_path = os.path.join(model_dir, 'word2vec_%s_{epoch:02d}.hdf5' % model_tag) if not os.path.exists(model_dir): os.mkdir(model_dir) # Logging # Create callback and logging objects log_metrics = [ 'categorical_accuracy', 'categorical_crossentropy', brier_pred, brier_true ] model_saver = keras.callbacks.ModelCheckpoint(model_path, verbose=1) # Log savers which play reasonably well with Keras train_tboard_logger = FilterTensorBoard(log_dir=train_log_dir, write_graph=False, write_images=False, log_regex=r'^(?!val).*') val_tboard_logger = FilterTensorBoard(log_dir=val_log_dir, write_graph=False, write_images=False, log_regex=r"^val") #Custom saver custom_tboard_saver = TensorBoardMod(log_dir=custom_log_dir, histogram_freq=0, write_graph=False, write_images=False, save_logs=False) _callbacks = [ model_saver, train_tboard_logger, val_tboard_logger, custom_tboard_saver ] # Parameters fed using Sacred vocab_model = create_vocab_model() ## Main training and testing embedding_matrix = vocab_model.syn0 vocab_dict = { word: vocab_model.vocab[word].index for word in vocab_model.vocab.keys() } vocab_size = len(vocab_dict) #Load class label dictionary class_ind_to_label = {} with open(class_labels, 'r') as cfi: for ind, line in enumerate(cfi): class_ind_to_label[ind] = line.rstrip() num_classes = len(class_ind_to_label) ## Create or load the model last_epoch, model_checkpoint_path = find_last_checkpoint(model_dir) initial_epoch = 0 if model_checkpoint_path is not None: print('Loading epoch {0:d} from {1:s}'.format(last_epoch, model_checkpoint_path)) _cust_objects = { 'brier_skill': brier_skill, 'brier_pred': brier_pred, 'brier_true': brier_true } model = keras.models.load_model(model_checkpoint_path, custom_objects=_cust_objects) initial_epoch = last_epoch + 1 else: print('Building new model') #----------------------# model = build_lstm_model(vocab_size, num_outputs=num_classes, embedding_matrix=embedding_matrix) model.compile(loss=loss_, optimizer=optimizer_, metrics=log_metrics) #-----------------------# print('Model summary') print(model.summary()) ## Custom tensorflow logging # Placeholder for the true values y_true = model.model._feed_targets[0] # This is the final softmax output layer of the model y_pred = model.outputs[0] create_batch_pairwise_metrics(y_true, y_pred) ## Training if initial_epoch < epochs: training_start_time = datetime.datetime.now() print('{0}: Starting training at epoch {1}/{2}'.format( training_start_time, initial_epoch, epochs)) train_generator = create_batch_generator(train_path, vocab_dict, num_classes, max_input_length, batch_size) val_size = 1000 val_generator = create_batch_generator(test_path, vocab_dict, num_classes, max_input_length, val_size) val_X, val_y = next(val_generator) print(len(val_X)) print(len(val_y[0])) model.fit_generator(train_generator, batches_per_epoch, epochs, callbacks=_callbacks, initial_epoch=initial_epoch, verbose=1) #validation_data = (val_X, val_y) training_end_time = datetime.datetime.now() print('{0}: Training finished at epoch {1}'.format( training_end_time, epochs)) training_time = training_end_time - training_start_time print('{0} elapsed to train {1} epochs'.format(str(training_time), epochs - initial_epoch)) ## Evaluation of final model if do_final_eval: num_test_samples = 1000 num_test_steps = num_test_samples // batch_size num_test_samples = num_test_steps * batch_size print('{0}: Starting testing on {1} samples'.format( datetime.datetime.now(), num_test_samples)) test_scores, test_time = eval_on_dataset(model, test_path, vocab_dict, num_classes, max_input_length, num_test_steps, batch_size) time_per_sample = test_time.total_seconds() / num_test_samples print("Seconds per sample: %2.2e sec" % time_per_sample)