def test_data_preprocess(self): img_resize = (16, 16) color_channels = 3 # RGB train_jpeg_dir, test_jpeg_dir, test_jpeg_additional, train_csv_file = data_helper.get_jpeg_data_files_paths() assert os.path.exists(train_jpeg_dir), "The {} folder does not exist".format(train_jpeg_dir) assert os.path.exists(test_jpeg_dir), "The {} folder does not exist".format(test_jpeg_dir) assert os.path.exists(test_jpeg_additional), "The {} file does not exist".format(test_jpeg_additional) assert os.path.exists(train_csv_file), "The {} file does not exist".format(train_csv_file) x_train, y_train, y_map = data_helper.preprocess_train_data(train_jpeg_dir, train_csv_file, img_resize=img_resize) x_test, _ = data_helper.preprocess_test_data(test_jpeg_dir, img_resize=img_resize) x_test_add, _ = data_helper.preprocess_test_data(test_jpeg_additional, img_resize=img_resize) labels_df = pd.read_csv(train_csv_file) labels_count = len(set(chain.from_iterable([tags.split(" ") for tags in labels_df['tags'].values]))) train_files_count = len(os.listdir(train_jpeg_dir)) test_files_count = len(os.listdir(test_jpeg_dir)) test_add_file_count = len(os.listdir(test_jpeg_additional)) assert x_train.shape == (train_files_count, *img_resize, color_channels) assert x_test.shape == (test_files_count, *img_resize, color_channels) assert x_test_add.shape == (test_add_file_count, *img_resize, color_channels) assert y_train.shape == (train_files_count, labels_count)
plt.savefig('Accuracy.png', transparent=False) # Look at our fbeta_score # In[13]: print("fbeta_score je - ", fbeta_score) # Before launching our predictions lets preprocess the test data and delete the old training data matrices # In[14]: del x_train, y_train gc.collect() x_test, x_test_filename = data_helper.preprocess_test_data( test_jpeg_dir, img_resize) # Predict the labels of our x_test images predictions = classifier.predict(x_test) # Now lets launch the predictions on the additionnal dataset (updated on 05/05/2017 on Kaggle) # In[15]: del x_test gc.collect() x_test, x_test_filename_additional = data_helper.preprocess_test_data( test_jpeg_additional, img_resize) new_predictions = classifier.predict(x_test) del x_test
epochs_arr = [20, 5, 5] learn_rates = [0.001, 0.0001, 0.00001] for learn_rate, epochs in zip(learn_rates, epochs_arr): tmp_train_losses, tmp_val_losses, fbeta_score = classifier.train_model(x_train, y_train, learn_rate, epochs, batch_size, validation_split_size=validation_split_size, train_callbacks=[checkpoint]) train_losses += tmp_train_losses val_losses += tmp_val_losses classifier.load_weights("weights.best.hdf5") print("Weights loaded") print (fbeta_score) del x_train, y_train gc.collect() x_test, x_test_filename = data_helper.preprocess_test_data(test_jpeg_dir, img_resize) # Predict the labels of our x_test images predictions = classifier.predict(x_test) del x_test gc.collect() x_test, x_test_filename_additional = data_helper.preprocess_test_data(test_jpeg_additional, img_resize) new_predictions = classifier.predict(x_test) del x_test gc.collect() predictions = np.vstack((predictions, new_predictions)) x_test_filename = np.hstack((x_test_filename, x_test_filename_additional)) print("Predictions shape: {}\nFiles name shape: {}\n1st predictions entry:\n{}".format(predictions.shape, x_test_filename.shape,