Beispiel #1
0
class BandStatisticsTestCase(unittest.TestCase):
    def setUp(self):
        self.root = os.path.abspath(os.path.join(
            os.path.dirname(__file__))) + '/test_data'
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.verbose = False
        self.cbm = BandStatistics(self.gh, self.verbose)

    def test__init__(self):
        self.assertEqual(self.cbm.gh, self.gh)
        self.assertEqual(self.cbm.verbose, self.verbose)

    def test_calc_band_mean_and_stddev(self):

        os.remove(self.root + '/root/gis/env_bio_mean_std.txt')
        self.assertFalse(
            os.path.isfile(self.root + '/root/gis/env_bio_mean_std.txt'))
        self.cbm.calc_band_mean_and_stddev()
        result = pd.read_csv(self.root + '/root/gis/env_bio_mean_std.txt',
                             delimiter='\t')
        truth = pd.read_csv(self.root +
                            '/band_statistics/env_bio_mean_std.txt',
                            delimiter='\t')
        self.assertTrue(
            os.path.isfile(self.root + '/root/gis/env_bio_mean_std.txt'))
        self.assertEqual(list(result.columns), ['band', 'mean', 'std_dev'])
        self.assertEqual(result.to_numpy().tolist(), truth.to_numpy().tolist())
Beispiel #2
0
class RasterStackTestCase(unittest.TestCase):
    def setUp(self):
        self.root = (os.path.abspath(os.path.join(os.path.dirname(__file__))) +
                     '/test_data').replace('\\', '/')
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.verbose = False
        self.crs = RasterStack(self.gh, self.verbose)

    def test__init__(self):

        self.assertEqual(self.crs.gh, self.gh)

    def test_create_raster_stack(self):
        os.remove(self.root + '/root/gis/stack/stacked_env_variables.tif')
        self.assertFalse(
            os.path.isfile(self.root +
                           '/root/gis/stack/stacked_env_variables.tif'))
        self.crs.create_raster_stack()
        self.assertTrue(
            os.path.isfile(self.root +
                           '/root/gis/stack/stacked_env_variables.tif'))
        ra = rasterio.open(self.gh.variables[0])
        rb = rasterio.open(self.gh.variables[1])
        rc = rasterio.open(self.gh.variables[2])
        rd = rasterio.open(self.gh.variables[3])
        raster_result = rasterio.open(
            self.root + '/root/gis/stack/stacked_env_variables.tif')
        np.testing.assert_array_equal(ra.read(1), raster_result.read(1))
        np.testing.assert_array_equal(rb.read(1), raster_result.read(2))
        np.testing.assert_array_equal(rc.read(1), raster_result.read(3))
        np.testing.assert_array_equal(rd.read(1), raster_result.read(4))
        [raster.close() for raster in [ra, rb, rc, rd, raster_result]]
Beispiel #3
0
class PredictorTestCase(unittest.TestCase):
    def setUp(self):
        self.root = os.path.abspath(os.path.join(
            os.path.dirname(__file__))) + '/test_data'
        self.oh = Occurrences(self.root + '/root')
        self.oh.validate_occurrences()
        self.oh.species_dictionary()
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.ch = Config(self.root + '/root', self.oh, self.gh)
        self.ch.search_config()
        self.ch.read_yaml()
        self.verbose = False
        self.p = Predictor(self.oh, self.gh, self.ch, self.verbose)

    def test__init__(self):
        self.assertEqual(self.p.oh, self.oh)
        self.assertEqual(self.p.gh, self.gh)
        self.assertEqual(self.p.ch, self.ch)
        self.assertEqual(self.p.verbose, self.verbose)

    def test_prep_prediction_data(self):
        myarray, index_minb1 = self.p.prep_prediction_data()
        myarray_truth = gdal.Open(
            self.root +
            '/root/gis/stack/stacked_env_variables.tif').ReadAsArray()
        empty_map = rasterio.open(self.root +
                                  '/root/gis/layers/empty_land_map.tif')
        empty_map = empty_map.read(1)
        min_empty_map = np.min(empty_map)
        index_minb1_truth = np.where(empty_map == min_empty_map)
        self.assertEqual(myarray.tolist(), myarray_truth.tolist())
        index_minb1 = [x.tolist() for x in index_minb1]
        index_minb1_truth = [x.tolist() for x in index_minb1_truth]
        self.assertEqual(index_minb1, index_minb1_truth)

    def notest_predict_distribution(self):
        myarray, index_minb1 = self.p.prep_prediction_data()
        new_band = self.p.predict_distribution(self.oh.name[0], myarray,
                                               index_minb1)
        with np.load(self.root + '/predictor/new_band.npz') as data:
            new_band_truth = data[list(data.keys())[0]]
            np.testing.assert_array_equal(new_band, new_band_truth)
Beispiel #4
0
class PresenceMapTestCase(unittest.TestCase):
    def setUp(self):
        self.root = os.path.abspath(os.path.join(
            os.path.dirname(__file__))) + '/test_data'
        self.oh = Occurrences(self.root + '/root')
        self.oh.validate_occurrences()
        self.oh.species_dictionary()
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.verbose = False

        self.cpm = PresenceMap(self.oh, self.gh, self.verbose)

    def test__init__(self):
        self.assertEqual(self.cpm.oh, self.oh)
        self.assertEqual(self.cpm.gh, self.gh)
        self.assertEqual(self.cpm.verbose, self.verbose)

    def test_create_presence_map(self):
        shutil.move(
            self.root +
            '/root/gis/layers/non-scaled/presence/arachis_duranensis_presence_map.tif',
            self.root +
            '/root/gis/layers/non-scaled/presence/true_arachis_duranensis_presence_map.tif'
        )
        shutil.move(
            self.root +
            '/root/gis/layers/non-scaled/presence/solanum_bukasovii_presence_map.tif',
            self.root +
            '/root/gis/layers/non-scaled/presence/true_solanum_bukasovii_presence_map.tif'
        )
        self.assertFalse(
            os.path.isfile(
                self.root +
                '/root/gis/layers/non-scaled/presence/arachis_duranensis_presence_map.tif'
            ))
        self.assertFalse(
            os.path.isfile(
                self.root +
                '/root/gis/layers/non-scaled/presence/solanum_bukasovii_presence_map.tif'
            ))
        self.cpm.create_presence_map()
        result_a = rasterio.open(
            self.root +
            '/root/gis/layers/non-scaled/presence/arachis_duranensis_presence_map.tif'
        )
        result_b = rasterio.open(
            self.root +
            '/root/gis/layers/non-scaled/presence/solanum_bukasovii_presence_map.tif'
        )
        truth_a = rasterio.open(
            self.root + '/presence_map/arachis_duranensis_presence_map.tif')
        truth_b = rasterio.open(
            self.root + '/presence_map/solanum_bukasovii_presence_map.tif')
        self.assertTrue(
            os.path.isfile(
                self.root +
                '/root/gis/layers/non-scaled/presence/arachis_duranensis_presence_map.tif'
            ))
        self.assertTrue(
            os.path.isfile(
                self.root +
                '/root/gis/layers/non-scaled/presence/solanum_bukasovii_presence_map.tif'
            ))
        self.assertEqual(result_a.read(1).tolist(), truth_a.read(1).tolist())
        self.assertEqual(result_b.read(1).tolist(), truth_b.read(1).tolist())
        [raster.close() for raster in [result_a, result_b, truth_a, truth_b]]
        os.remove(
            self.root +
            '/root/gis/layers/non-scaled/presence/arachis_duranensis_presence_map.tif'
        )
        os.remove(
            self.root +
            '/root/gis/layers/non-scaled/presence/solanum_bukasovii_presence_map.tif'
        )
        shutil.move(
            self.root +
            '/root/gis/layers/non-scaled/presence/true_arachis_duranensis_presence_map.tif',
            self.root +
            '/root/gis/layers/non-scaled/presence/arachis_duranensis_presence_map.tif'
        )
        shutil.move(
            self.root +
            '/root/gis/layers/non-scaled/presence/true_solanum_bukasovii_presence_map.tif',
            self.root +
            '/root/gis/layers/non-scaled/presence/solanum_bukasovii_presence_map.tif'
        )
Beispiel #5
0
class PredictionDataTestCase(unittest.TestCase):
    def setUp(self):
        self.root = os.path.abspath(os.path.join(
            os.path.dirname(__file__))) + '/test_data'
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.verbose = False

        self.cpd = PredictionData(self.gh, self.verbose)

    def test__init__(self):
        self.assertEqual(self.cpd.gh, self.gh)
        self.assertEqual(self.cpd.verbose, self.verbose)

    def test_prepare_prediction_df(self):
        lon, lat, row, col, myarray, mean_std = self.cpd.prepare_prediction_df(
        )
        lon_truth = np.load(self.root + '/prediction_data/lon.npy')
        lat_truth = np.load(self.root + '/prediction_data/lat.npy')
        row_truth = np.load(self.root + '/prediction_data/row.npy')
        col_truth = np.load(self.root + '/prediction_data/col.npy')
        myarray_truth = gdal.Open(
            self.root +
            '/root/gis/stack/stacked_env_variables.tif').ReadAsArray()
        mean_std_truth = np.load(self.root + '/prediction_data/mean_std.npy')
        self.assertEqual(lon.tolist(), lon_truth.tolist())
        self.assertEqual(lat.tolist(), lat_truth.tolist())
        self.assertEqual(row, row_truth.tolist())
        self.assertEqual(col, col_truth.tolist())
        self.assertEqual(myarray.tolist(), myarray_truth.tolist())
        self.assertEqual(mean_std.tolist(), mean_std_truth.tolist())

    def test_create_prediction_df(self):
        os.remove(self.root + '/root/gis/world_prediction_array.npy')
        os.remove(self.root + '/root/gis/world_prediction_row_col.csv')
        self.assertFalse(
            os.path.isfile(self.root + '/root/gis/world_prediction_array.npy'))
        self.assertFalse(
            os.path.isfile(self.root +
                           '/root/gis/world_prediction_row_col.csv'))

        self.cpd.create_prediction_df()

        self.assertTrue(
            os.path.isfile(self.root + '/root/gis/world_prediction_array.npy'))
        npy_result = np.load(self.root +
                             '/root/gis/world_prediction_array.npy')
        npy_truth = np.load(self.root +
                            '/prediction_data/world_prediction_array.npy')
        self.assertEqual(npy_result.tolist(), npy_truth.tolist())

        self.assertTrue(
            os.path.isfile(self.root +
                           '/root/gis/world_prediction_row_col.csv'))
        csv_result = pd.read_csv(self.root +
                                 '/root/gis/world_prediction_row_col.csv')
        csv_truth = pd.read_csv(
            self.root + '/prediction_data/world_prediction_row_col.csv')
        self.assertEqual(csv_result.to_numpy().tolist(),
                         csv_truth.to_numpy().tolist())
Beispiel #6
0
class TrainingDataTestCase(unittest.TestCase):
    def setUp(self):
        self.root = os.path.abspath(os.path.join(
            os.path.dirname(__file__))) + '/test_data'
        self.oh = Occurrences(self.root + '/root')
        self.oh.validate_occurrences()
        self.oh.species_dictionary()
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.verbose = False

        self.ctd = TrainingData(self.oh, self.gh, self.verbose)

    def test__init__(self):
        self.assertEqual(self.ctd.oh, self.oh)
        self.assertEqual(self.ctd.gh, self.gh)
        self.assertEqual(self.ctd.verbose, self.verbose)

    def test_prep_training_df(self):
        src = rasterio.open(self.root +
                            '/root/gis/stack/stacked_env_variables.tif')
        inRas = gdal.Open(self.root +
                          '/root/gis/stack/stacked_env_variables.tif')
        spec, ppa, long, lati, row, col, myarray, mean_std = self.ctd.prep_training_df(
            src, inRas, self.oh.name[0])
        ppa_truth = np.load(self.root + '/training_data/ppa.npy')
        long_truth = np.load(self.root + '/training_data/long.npy')
        lati_truth = np.load(self.root + '/training_data/lati.npy')
        row_truth = np.load(self.root + '/training_data/row.npy')
        col_truth = np.load(self.root + '/training_data/col.npy')
        mean_std_truth = np.load(self.root + '/training_data/mean_std.npy')
        self.assertEqual(spec, self.oh.name[0])
        self.assertEqual(ppa.to_numpy().tolist(), ppa_truth.tolist())
        self.assertEqual(long.tolist(), long_truth.tolist())
        self.assertEqual(lati.tolist(), lati_truth.tolist())
        self.assertEqual(row, row_truth.tolist())
        self.assertEqual(col, col_truth.tolist())
        self.assertEqual(myarray.tolist(), inRas.ReadAsArray().tolist())
        self.assertEqual(mean_std.tolist(), mean_std_truth.tolist())
        src.close()

    def test_create_training_df(self):
        os.remove(self.root +
                  '/root/spec_ppa_env/arachis_duranensis_env_dataframe.csv')
        os.remove(self.root +
                  '/root/spec_ppa_env/solanum_bukasovii_env_dataframe.csv')
        self.assertFalse(
            os.path.isfile(
                self.root +
                '/root/spec_ppa_env/arachis_duranensis_env_dataframe.csv'))
        self.assertFalse(
            os.path.isfile(
                self.root +
                '/root/spec_ppa_env/solanum_bukasovii_env_dataframe.csv'))
        self.ctd.create_training_df()
        self.assertTrue(
            os.path.isfile(
                self.root +
                '/root/spec_ppa_env/arachis_duranensis_env_dataframe.csv'))
        self.assertTrue(
            os.path.isfile(
                self.root +
                '/root/spec_ppa_env/solanum_bukasovii_env_dataframe.csv'))
        result_a = pd.read_csv(
            self.root +
            '/root/spec_ppa_env/arachis_duranensis_env_dataframe.csv')
        result_b = pd.read_csv(
            self.root +
            '/root/spec_ppa_env/solanum_bukasovii_env_dataframe.csv')
        truth_a = pd.read_csv(
            self.root + '/training_data/arachis_duranensis_env_dataframe.csv')
        truth_b = pd.read_csv(
            self.root + '/training_data/solanum_bukasovii_env_dataframe.csv')
        self.assertEqual(list(result_a.columns), list(truth_a.columns))
        self.assertEqual(list(result_a.columns), list(truth_a.columns))
        self.assertEqual(result_a.to_numpy().tolist(),
                         truth_a.to_numpy().tolist())
        self.assertEqual(result_b.to_numpy().tolist(),
                         truth_b.to_numpy().tolist())
Beispiel #7
0
class TrainerTestCase(unittest.TestCase):

    def setUp(self):
        self.root = os.path.abspath(os.path.join(os.path.dirname(__file__))) + '/test_data'
        self.oh = Occurrences(self.root + '/root')
        self.oh.validate_occurrences()
        self.oh.species_dictionary()
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.ch = Config(self.root + '/root', self.oh, self.gh)
        self.ch.search_config()
        self.ch.read_yaml()
        self.verbose = False
        self.t = Trainer(self.oh, self.gh, self.ch, self.verbose)

    def test__init__(self):
        self.assertEqual(self.t.oh, self.oh)
        self.assertEqual(self.t.gh, self.gh)
        self.assertEqual(self.t.ch, self.ch)
        self.assertEqual(self.t.verbose, self.verbose)
        self.assertEqual(self.t.spec, '')
        self.assertEqual(self.t.variables, [])
        self.assertEqual(self.t.test_loss, [])
        self.assertEqual(self.t.test_acc, [])
        self.assertEqual(self.t.test_AUC, [])
        self.assertEqual(self.t.test_tpr, [])
        self.assertEqual(self.t.test_uci, [])
        self.assertEqual(self.t.test_lci, [])
        self.assertEqual(self.t.best_model_auc, [0])
        self.assertEqual(self.t.occ_len, 0)
        self.assertEqual(self.t.abs_len, 0)
        self.assertEqual(self.t.random_seed, self.ch.random_seed)
        self.assertEqual(self.t.batch, self.ch.batchsize)
        self.assertEqual(self.t.epoch, self.ch.epoch)
        self.assertEqual(self.t.model_layers, self.ch.model_layers)
        self.assertEqual(self.t.model_dropout, self.ch.model_dropout)

    def test_create_eval(self):
        os.remove(self.root + '/root/results/_DNN_performance/DNN_eval.txt')
        self.assertFalse(os.path.isfile(self.root + '/root/results/_DNN_performance/DNN_eval.txt'))
        print(self.root)
        self.t.create_eval()
        self.assertTrue(os.path.isfile(self.root + '/root/results/_DNN_performance/DNN_eval.txt'))
        dnn_eval = pd.read_csv(self.root + '/root/results/_DNN_performance/DNN_eval.txt', delimiter='\t')
        dnn_eval_truth = pd.read_csv(self.root + '/trainer/create_eval.txt', delimiter='\t')
        self.assertEqual(dnn_eval.to_numpy().tolist(), dnn_eval_truth.to_numpy().tolist())

    def test_create_input_data(self):
        self.t.spec = self.oh.name[0]
        X, X_train, X_test, y_train, y_test, test_set, shuffled_X_train, shuffled_X_test = self.t.create_input_data()
        X_truth = np.load(self.root + '/trainer/X.npy')
        X_train_truth = np.load(self.root + '/trainer/X_train.npy')
        X_test_truth = np.load(self.root + '/trainer/X_test.npy')
        y_train_truth = np.load(self.root + '/trainer/y_train.npy')
        y_test_truth = np.load(self.root + '/trainer/y_test.npy')
        test_set_truth = np.load(self.root + '/trainer/test_set.npy')
        shuffled_X_train_truth = np.load(self.root + '/trainer/shuffled_X_train.npy')
        shuffled_X_test_truth = np.load(self.root + '/trainer/shuffled_X_test.npy')
        self.assertEqual(X.tolist(), X_truth.tolist())
        self.assertEqual(X_train.tolist(), X_train_truth.tolist())
        self.assertEqual(X_test.tolist(), X_test_truth.tolist())
        self.assertEqual(y_train.tolist(), y_train_truth.tolist())
        self.assertEqual(y_test.tolist(), y_test_truth.tolist())
        self.assertEqual(test_set.to_numpy().tolist(), test_set_truth.tolist())
        self.assertEqual(shuffled_X_train.tolist(), shuffled_X_train_truth.tolist())
        self.assertEqual(shuffled_X_test.tolist(), shuffled_X_test_truth.tolist())

    def test_create_model_architecture(self):
        self.t.spec = self.oh.name[0]
        X, _, _, _, _, _, _, _ = self.t.create_input_data()
        model = self.t.create_model_architecture(X)
        model_truth = keras.models.load_model(self.root + '/trainer/model.h5')
        self.assertEqual(model.get_config(), model_truth.get_config())
        weights = [x.tolist() for x in model.get_weights()]
        weights_truth = [x.tolist() for x in model_truth.get_weights()]
        self.assertEqual(weights, weights_truth)

    def notest_train_model(self):
        self.t.spec = self.oh.name[0]
        X, X_train, X_test, y_train, y_test, _, _, _ = self.t.create_input_data()
        model = self.t.create_model_architecture(X)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        tf.Session(config=config)
        AUC, model = self.t.train_model(model, X_train, X_test, y_train, y_test)
        AUC_truth = 0.9930313588850174
        model_truth = keras.models.load_model(self.root + '/trainer/model_trained.h5')
        print(model.get_config())
        self.assertAlmostEqual(AUC, AUC_truth)
        #self.assertEqual(model.get_config(), model_truth.get_config()) ## look into this (it crashes when running the whole test suite but passes when only running this test)
        weights = [x.tolist() for x in model.get_weights()]
        weights_truth = [x.tolist() for x in model_truth.get_weights()]
        if len(weights) == len(weights_truth):
            for list in range(len(weights)):
                if len(weights[list]) == len(weights_truth[list]):
                    for lis in range(len(weights[list])):
                        np.testing.assert_almost_equal(weights[list][lis], weights_truth[list][lis], 6)

    def notest_update_performance_metrics(self):
        self.t.spec = self.oh.name[0]
        X, X_train, X_test, y_train, y_test, _, _, _ = self.t.create_input_data()
        model = self.t.create_model_architecture(X)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        tf.Session(config=config)
        AUC, model = self.t.train_model(model, X_train, X_test, y_train, y_test)
        os.remove(self.root + '/root/results/_DNN_performance/DNN_eval.txt')
        self.assertFalse(os.path.isfile(self.root + '/root/results/_DNN_performance/DNN_eval.txt'))
        self.t.create_eval()
        self.t.update_performance_metrics()
        self.assertTrue(os.path.isfile(self.root + '/root/results/_DNN_performance/DNN_eval.txt'))
        dnn_eval = pd.read_csv(self.root + '/root/results/_DNN_performance/DNN_eval.txt', delimiter='\t')
        dnn_eval_truth = pd.read_csv(self.root + '/trainer/update_performance_metrics.txt', delimiter='\t')
        self.assertEqual(dnn_eval.to_numpy()[0][0],dnn_eval_truth.to_numpy()[0][0])
        np.testing.assert_almost_equal(dnn_eval.to_numpy()[0][1:], dnn_eval_truth.to_numpy()[0][1:],6)
Beispiel #8
0
class PresencePseudoAbsenceTestCase(unittest.TestCase):
    def setUp(self):
        self.root = (os.path.abspath(os.path.join(os.path.dirname(__file__))) +
                     '/test_data').replace('\\', '/')
        self.oh = Occurrences(self.root + '/root')
        self.oh.validate_occurrences()
        self.oh.species_dictionary()
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.ch = Config(self.root, self.oh, self.gh)
        self.ch.search_config()
        self.ch.read_yaml()
        self.ch.random_seed = 1
        self.verbose = False

        self.ppa = PresencePseudoAbsence(self.oh, self.gh, self.ch,
                                         self.verbose)

    def test__init__(self):

        self.assertEqual(self.ppa.oh, self.oh)
        self.assertEqual(self.ppa.gh, self.gh)
        self.assertEqual(self.ppa.ch, self.ch)
        self.assertEqual(self.ppa.verbose, self.verbose)
        self.assertEqual(self.ppa.random_sample_size, self.ch.pseudo_freq)
        self.assertEqual(self.ppa.random_seed, self.ch.random_seed)

    def test_draw_random_absence(self):
        key = self.oh.name[0]
        presence_data, outer_random_sample_lon_lats, sample_size = self.ppa.draw_random_absence(
            key)
        presence_truth = np.load(self.root +
                                 '/presence_pseudo_absence/presence_data.npy',
                                 allow_pickle=True)
        outer_random_sample_lon_lats_truth = np.load(
            self.root + '/presence_pseudo_absence/outer_random_sample.npy')
        self.assertEqual(presence_data.to_numpy().tolist(),
                         presence_truth.tolist())
        self.assertEqual(outer_random_sample_lon_lats.tolist(),
                         outer_random_sample_lon_lats_truth.tolist())
        self.assertEqual(sample_size, self.ch.pseudo_freq)

    def test_create_presence_pseudo_absence(self):
        os.remove(self.root +
                  '/root/spec_ppa/arachis_duranensis_ppa_dataframe.csv')
        os.remove(self.root +
                  '/root/spec_ppa/solanum_bukasovii_ppa_dataframe.csv')
        self.assertFalse(
            os.path.isfile(
                self.root +
                '/root/spec_ppa/arachis_duranensis_ppa_dataframe.csv'))
        self.assertFalse(
            os.path.isfile(
                self.root +
                '/root/spec_ppa/solanum_bukasovii_ppa_dataframe.csv'))
        self.ppa.create_presence_pseudo_absence()
        self.assertTrue(
            os.path.isfile(
                self.root +
                '/root/spec_ppa/arachis_duranensis_ppa_dataframe.csv'))
        self.assertTrue(
            os.path.isfile(
                self.root +
                '/root/spec_ppa/solanum_bukasovii_ppa_dataframe.csv'))
        ppa_a = pd.read_csv(
            self.root + '/root/spec_ppa/arachis_duranensis_ppa_dataframe.csv')
        ppa_b = pd.read_csv(
            self.root + '/root/spec_ppa/solanum_bukasovii_ppa_dataframe.csv')
        truth_a = pd.read_csv(
            self.root +
            '/presence_pseudo_absence/arachis_duranensis_ppa_dataframe.csv')
        truth_b = pd.read_csv(
            self.root +
            '/presence_pseudo_absence/solanum_bukasovii_ppa_dataframe.csv')
        self.assertEqual(ppa_a.to_numpy().tolist(),
                         truth_a.to_numpy().tolist())
        self.assertEqual(ppa_b.to_numpy().tolist(),
                         truth_b.to_numpy().tolist())
Beispiel #9
0
class GISTesCase(unittest.TestCase):
    """Test cases for Config Handler class."""
    def setUp(self):
        self.root = (os.path.abspath(os.path.join(os.path.dirname(__file__))) +
                     '/test_data/root').replace('\\', '/')

    def test__init__(self):
        self.gh = GIS(self.root)
        self.assertEqual(self.gh.root, self.root)
        self.assertEqual(self.gh.scaled, '')
        self.assertEqual(self.gh.non_scaled, '')
        self.assertEqual(self.gh.gis, '')
        self.assertEqual(self.gh.world_locations_to_predict, '')
        self.assertEqual(self.gh.empty_map, '')
        self.assertEqual(self.gh.variables, [])
        self.assertEqual(self.gh.names, [])
        self.assertEqual(self.gh.length, 0)
        self.assertEqual(self.gh.scaled_len, 0)
        self.assertEqual(self.gh.presence, '')
        self.assertEqual(self.gh.stack, '')
        self.assertEqual(self.gh.stack_clip, '')
        self.assertEqual(self.gh.spec_ppa, '')
        self.assertEqual(self.gh.spec_ppa_env, '')

    def test_validate_gis(self):
        self.gh = GIS(self.root)
        self.gh.validate_gis()
        self.assertEqual(self.gh.gis, self.root + '/gis')
        self.assertEqual(self.gh.scaled, self.gh.root + '/gis/layers/scaled')
        self.assertEqual(self.gh.non_scaled,
                         self.gh.root + '/gis/layers/non-scaled')
        self.assertEqual(self.gh.world_locations_to_predict,
                         self.root + '/gis/world_locations_to_predict.csv')
        self.assertEqual(self.gh.empty_map,
                         self.root + '/gis/layers/empty_land_map.tif')

        with self.assertRaises(IOError):
            self.gh = GIS(self.root + '/scaled')
            self.gh.validate_gis()

    def test_validate_list(self):
        self.gh = GIS(self.root)
        self.gh.validate_gis()
        test_root = self.root + '/gis/layers/scaled'
        f, n, = self.gh.variables_list(test_root)
        f_truth = []
        n_truth = []
        for a, b, c in sorted(os.walk(test_root)):
            for file in c:
                file_ext = file.split('.')[-1]
                fx = file_ext.lower()
                if fx == 'tif' or fx == 'tiff':
                    f_truth += [a.replace('\\', '/') + '/' + file]
                    n_truth += [file.replace('.%s' % file_ext, '')]
        self.assertEqual(f, f_truth)
        self.assertEqual(n, n_truth)

    def test_validate_tif(self):
        self.gh = GIS(self.root)
        self.gh.validate_gis()
        self.gh.validate_tif()
        f1, n1 = self.gh.variables_list(self.root + '/gis/layers/scaled')
        f2, n2 = self.gh.variables_list(self.root + '/gis/layers/non-scaled')
        variables_truth = sorted(f1) + sorted(f2)
        names_truth = sorted(n1) + sorted(n2)
        self.assertEqual(self.gh.variables, variables_truth)
        self.assertEqual(self.gh.names, names_truth)
        self.assertEqual(self.gh.length, 6)
        self.assertEqual(self.gh.scaled_len, 4)

        with self.assertRaises(IOError):
            self.gh = GIS(self.root + '/scaled')
            self.gh.validate_gis()

    def test_define_output(self):
        self.gh = GIS(self.root)
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()
        self.assertEqual(self.gh.presence, (self.gh.non_scaled + '/presence'))
        self.assertEqual(self.gh.stack, (self.gh.gis + '/stack'))
        self.assertEqual(self.gh.stack_clip, (self.gh.gis + '/stack_clip'))
        self.assertEqual(self.gh.spec_ppa, (self.gh.root + '/spec_ppa'))
        self.assertEqual(self.gh.spec_ppa_env,
                         (self.gh.root + '/spec_ppa_env'))
Beispiel #10
0
class sdmdl:
    """sdmdl object with one required parameter: root of the repository, that is holding all occurrences and
    environmental layers. And two additional parameters: dat_root (data root of raster layers) and occ_root (root of
    occurrence files.

    Note: the root of the raster layers and occurrence data can be changed. Be aware that directories provided by the
    user need to contain required files that are present on the GitHub repository.

    :param root: a string representation of the root of the cloned or copied GitHub repository.
    :param dat_root: a string representation of the data directory within the repository. Any files that are present
    in the repositories data folder also need to be present in the directory provided by the user.
    :param occ_root: a string representation of the occurrence directory within the data directory of repository.
    :return: Object. Used to manage all phases of model creation. Handling data preparations, model training and
    prediction.
    """
    def __init__(self, root, dat_root='/data', occ_root='/data/occurrences'):
        """sdmdl object initiation."""

        self.root = root
        self.occ_root = self.root + occ_root if occ_root == '/data/occurrences' else occ_root
        self.dat_root = self.root + dat_root if dat_root == '/data' else dat_root

        self.oh = Occurrences(self.occ_root)
        self.oh.validate_occurrences()
        self.oh.species_dictionary()

        self.gh = GIS(self.dat_root)
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()

        self.ch = Config(self.dat_root, self.oh, self.gh)
        self.ch.search_config()
        self.ch.read_yaml()

        self.verbose = self.ch.verbose
        if not self.verbose:
            # used to silence tensorflow backend deprecation warnings.
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
            logging.getLogger("tensorflow").setLevel(logging.ERROR)

    def reload_config(self):
        """unimplemented, required later for changes to the config file to be automatically detected."""

        pass

    def prep(self):
        """prep function that manages the process of data pre-processing."""

        cpm = PresenceMap(self.oh, self.gh, self.verbose)
        cpm.create_presence_map()

        # currently the raster layers need to be validated again to detect the new presence maps created in the previous
        # step. Adding these presence maps to the list of raster layers could be integrated into the create_presence_map
        # method of the PresenceMap class.

        # Note: This currently leads to unwanted behaviour when:
        # A new sdmdl object is created, the data is already preprocessed, and the user executes the method train
        # without first executing the method prep. This would not be a problem if raster layers were automatically
        # detected but is caused by the creation of the config.yml file that does not including the presence maps.

        self.gh.validate_tif()

        crs = RasterStack(self.gh, self.verbose)
        crs.create_raster_stack()

        ppa = PresencePseudoAbsence(self.oh, self.gh, self.ch, self.verbose)
        ppa.create_presence_pseudo_absence()

        cbm = BandStatistics(self.gh, self.verbose)
        cbm.calc_band_mean_and_stddev()

        ctd = TrainingData(self.oh, self.gh, self.verbose)
        ctd.create_training_df()

        cpd = PredictionData(self.gh, self.verbose)
        cpd.create_prediction_df()

    def train(self):
        """train function that manages the process of model training."""

        th = Trainer(self.oh, self.gh, self.ch, self.verbose)
        th.train()

    def predict(self):
        """predict function that manages the process of model prediction."""

        ph = Predictor(self.oh, self.gh, self.ch, self.verbose)
        ph.predict_model()

    def clean(self):
        """pass."""
        def listdir_if_exists(path):
            if os.path.isdir(path):
                return os.listdir(path)
            else:
                return []

        def rm_if_exists(path):
            if os.path.isfile(path):
                os.remove(path)

        def rmdir_if_exists(path):
            if os.path.isdir(path):
                os.rmdir(path)

        for f in listdir_if_exists(self.gh.non_scaled + '/presence'):
            rm_if_exists(self.gh.non_scaled + '/presence/' + f)
        rmdir_if_exists(self.gh.non_scaled + '/presence')
        rm_if_exists(self.gh.stack + '/stacked_env_variables.tif')
        rmdir_if_exists(self.gh.stack)
        for f in listdir_if_exists(self.gh.spec_ppa):
            rm_if_exists(self.gh.spec_ppa + '/' + f)
        rmdir_if_exists(self.gh.spec_ppa)
        rm_if_exists(self.gh.gis + '/env_bio_mean_std.txt')
        for f in listdir_if_exists(self.gh.spec_ppa_env):
            rm_if_exists(self.gh.spec_ppa_env + '/' + f)
        rmdir_if_exists(self.gh.spec_ppa_env)
        rm_if_exists(self.gh.gis + '/world_prediction_array.npy')
        rm_if_exists(self.gh.gis + '/world_prediction_row_col.csv')
        rm_if_exists(self.gh.root + '/filtered.csv')
Beispiel #11
0
class ConfigTestCase(unittest.TestCase):
    """Test cases for Config Handler class."""
    def setUp(self):
        self.root = (os.path.abspath(os.path.join(os.path.dirname(__file__))) +
                     '/test_data').replace('\\', '/')
        self.oh = Occurrences(self.root + '/root')
        self.oh.validate_occurrences()
        self.oh.species_dictionary()
        self.gh = GIS(self.root + '/root')
        self.gh.validate_gis()
        self.gh.validate_tif()
        self.gh.define_output()

        self.ch = Config(self.root + '/root', self.oh, self.gh)

    def test__init__(self):
        self.assertEqual(self.ch.oh, self.oh)
        self.assertEqual(self.ch.gh, self.gh)
        self.assertEqual(self.ch.root, self.root + '/root')
        self.assertEqual(self.ch.config, [])
        self.assertEqual(self.ch.yml_names, [
            'data_path', 'occurrence_path', 'result_path', 'occurrences',
            'layers', 'random_seed', 'pseudo_freq', 'batchsize', 'epoch',
            'model_layers', 'model_dropout', 'verbose'
        ])
        self.assertEqual(self.ch.data_path, None)
        self.assertEqual(self.ch.occ_path, None)
        self.assertEqual(self.ch.result_path, None)
        self.assertEqual(self.ch.yml, None)
        self.assertEqual(self.ch.random_seed, 0)
        self.assertEqual(self.ch.pseudo_freq, 0)
        self.assertEqual(self.ch.batchsize, 0)
        self.assertEqual(self.ch.epoch, 0)
        self.assertEqual(self.ch.model_layers, [])
        self.assertEqual(self.ch.model_dropout, [])
        self.assertEqual(self.ch.verbose, None)

    def test_search_config(self):
        self.ch.search_config()
        self.assertEqual(self.ch.config, self.root + '/root/config.yml')
        with self.assertRaises(IOError):
            self.ch = Config(self.root + '/config', self.oh, self.gh)
            self.ch.search_config()

    def test_create_yaml(self):
        self.ch.search_config()
        self.ch.config = self.root + '/root/test_config.yml'
        self.ch.create_yaml()
        with open(self.ch.config, 'r') as stream:
            yml = yaml.safe_load(stream)
        self.assertEqual(yml[list(yml.keys())[0]], self.root + '/root')
        self.assertEqual(yml[list(yml.keys())[1]],
                         self.root + '/root/occurrences')
        self.assertEqual(yml[list(yml.keys())[2]], self.root + '/root/results')
        self.assertEqual(yml[list(yml.keys())[3]],
                         dict(zip(self.oh.name, self.oh.path)))
        self.assertEqual(yml[list(yml.keys())[4]],
                         dict(zip(self.gh.names, self.gh.variables)))
        self.assertEqual(yml[list(yml.keys())[5]], 42)
        self.assertEqual(yml[list(yml.keys())[6]], 2000)
        self.assertEqual(yml[list(yml.keys())[7]], 75)
        self.assertEqual(yml[list(yml.keys())[8]], 150)
        self.assertEqual(yml[list(yml.keys())[9]], [250, 200, 150, 100])
        self.assertEqual(yml[list(yml.keys())[10]], [0.3, 0.5, 0.3, 0.5])
        self.assertEqual(yml[list(yml.keys())[11]], True)
        os.remove(self.root + '/root/test_config.yml')

    def test_read_yaml(self):
        self.ch.search_config()
        self.ch.read_yaml()
        self.assertEqual(self.ch.data_path, self.root + '/root')
        self.assertEqual(self.ch.occ_path, self.root + '/root/occurrences')
        self.assertEqual(self.ch.result_path, self.root + '/root/results')
        self.assertEqual(self.ch.oh.name,
                         list(dict(zip(self.oh.name, self.oh.path)).keys()))
        self.assertEqual(self.ch.oh.path,
                         list(dict(zip(self.oh.name, self.oh.path)).values()))
        self.assertEqual(
            self.ch.gh.names,
            list(dict(zip(self.gh.names, self.gh.variables)).keys()))
        self.assertEqual(
            self.ch.gh.variables,
            list(dict(zip(self.gh.names, self.gh.variables)).values()))
        self.assertEqual(self.ch.random_seed, 42)
        self.assertEqual(self.ch.pseudo_freq, 2000)
        self.assertEqual(self.ch.batchsize, 75)
        self.assertEqual(self.ch.epoch, 150)
        self.assertEqual(self.ch.model_layers, [250, 200, 150, 100])
        self.assertEqual(self.ch.model_dropout, [0.3, 0.5, 0.3, 0.5])
        self.assertEqual(self.ch.verbose, True)