Python preprocess_test_data Exemples, data_helper.preprocess_test_data Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : amazon_keras_classifier_tests.py Projet : slzephyr/kaggle_projects

    def test_data_preprocess(self):
        img_resize = (16, 16)
        color_channels = 3  # RGB
        train_jpeg_dir, test_jpeg_dir, test_jpeg_additional, train_csv_file = data_helper.get_jpeg_data_files_paths()

        assert os.path.exists(train_jpeg_dir), "The {} folder does not exist".format(train_jpeg_dir)
        assert os.path.exists(test_jpeg_dir), "The {} folder does not exist".format(test_jpeg_dir)
        assert os.path.exists(test_jpeg_additional), "The {} file does not exist".format(test_jpeg_additional)
        assert os.path.exists(train_csv_file), "The {} file does not exist".format(train_csv_file)

        x_train, y_train, y_map = data_helper.preprocess_train_data(train_jpeg_dir, train_csv_file,
                                                                    img_resize=img_resize)

        x_test, _ = data_helper.preprocess_test_data(test_jpeg_dir, img_resize=img_resize)
        x_test_add, _ = data_helper.preprocess_test_data(test_jpeg_additional, img_resize=img_resize)

        labels_df = pd.read_csv(train_csv_file)
        labels_count = len(set(chain.from_iterable([tags.split(" ") for tags in labels_df['tags'].values])))
        train_files_count = len(os.listdir(train_jpeg_dir))
        test_files_count = len(os.listdir(test_jpeg_dir))
        test_add_file_count = len(os.listdir(test_jpeg_additional))
        assert x_train.shape == (train_files_count, *img_resize, color_channels)
        assert x_test.shape == (test_files_count, *img_resize, color_channels)
        assert x_test_add.shape == (test_add_file_count, *img_resize, color_channels)
        assert y_train.shape == (train_files_count, labels_count)

Exemple #2

0

Afficher le fichier

Fichier : amazon_forest_notebook.py Projet : siskaj/planet-amazon-deforestation

plt.savefig('Accuracy.png', transparent=False)

# Look at our fbeta_score

# In[13]:

print("fbeta_score je - ", fbeta_score)

# Before launching our predictions lets preprocess the test data and delete the old training data matrices

# In[14]:

del x_train, y_train
gc.collect()

x_test, x_test_filename = data_helper.preprocess_test_data(
    test_jpeg_dir, img_resize)
# Predict the labels of our x_test images
predictions = classifier.predict(x_test)

# Now lets launch the predictions on the additionnal dataset (updated on 05/05/2017 on Kaggle)

# In[15]:

del x_test
gc.collect()

x_test, x_test_filename_additional = data_helper.preprocess_test_data(
    test_jpeg_additional, img_resize)
new_predictions = classifier.predict(x_test)

del x_test

Exemple #3

0

Afficher le fichier

Fichier : testpl.py Projet : sintekllc/planet-amazon-deforestation

epochs_arr = [20, 5, 5]
learn_rates = [0.001, 0.0001, 0.00001]
for learn_rate, epochs in zip(learn_rates, epochs_arr):
    tmp_train_losses, tmp_val_losses, fbeta_score = classifier.train_model(x_train, y_train, learn_rate, epochs, 
                                                                           batch_size, validation_split_size=validation_split_size, 
                                                                           train_callbacks=[checkpoint])
    train_losses += tmp_train_losses
    val_losses += tmp_val_losses
classifier.load_weights("weights.best.hdf5")
print("Weights loaded")

print (fbeta_score)
del x_train, y_train
gc.collect()

x_test, x_test_filename = data_helper.preprocess_test_data(test_jpeg_dir, img_resize)
# Predict the labels of our x_test images
predictions = classifier.predict(x_test)

del x_test
gc.collect()

x_test, x_test_filename_additional = data_helper.preprocess_test_data(test_jpeg_additional, img_resize)
new_predictions = classifier.predict(x_test)

del x_test
gc.collect()
predictions = np.vstack((predictions, new_predictions))
x_test_filename = np.hstack((x_test_filename, x_test_filename_additional))
print("Predictions shape: {}\nFiles name shape: {}\n1st predictions entry:\n{}".format(predictions.shape, 
                                                                              x_test_filename.shape,