def test_update_config_file2(self): "Test config file update utility, version 2." input_dir = dbutil.input_directory(self.MODULE, self.SUITE) output_dir = dbutil.output_directory(self.MODULE, self.SUITE) expected_dir = dbutil.expected_directory(self.MODULE, self.SUITE) updates = {'dbname': 'TEST_DBNAME', 'temp_dir': 'TEST_TEMP_DIR', 'tile_root': 'TEST_TILE_ROOT' } config_file_name = 'test_datacube.conf' output_file_name = 'test2_datacube.conf' output_path = dbutil.update_config_file2(updates, input_dir, output_dir, config_file_name, output_file_name) expected_path = os.path.join(expected_dir, output_file_name) if not os.path.isfile(expected_path): self.skipTest("Expected config file not found.") else: try: subprocess.check_output(['diff', output_path, expected_path]) except subprocess.CalledProcessError as err: self.fail("Config file does not match expected result:\n" + err.output)
def test_update_config_file2(self): "Test config file update utility, version 2." input_dir = dbutil.input_directory(self.MODULE, self.SUITE) output_dir = dbutil.output_directory(self.MODULE, self.SUITE) expected_dir = dbutil.expected_directory(self.MODULE, self.SUITE) updates = { 'dbname': 'TEST_DBNAME', 'temp_dir': 'TEST_TEMP_DIR', 'tile_root': 'TEST_TILE_ROOT' } config_file_name = 'test_datacube.conf' output_file_name = 'test2_datacube.conf' output_path = dbutil.update_config_file2(updates, input_dir, output_dir, config_file_name, output_file_name) expected_path = os.path.join(expected_dir, output_file_name) if not os.path.isfile(expected_path): self.skipTest("Expected config file not found.") else: try: subprocess.check_output(['diff', output_path, expected_path]) except subprocess.CalledProcessError as err: self.fail("Config file does not match expected result:\n" + err.output)
def process_args(self): MODULE = 'new_ingest_benchmark' SUITE = 'benchmark' self.INPUT_DIR = dbutil.input_directory(MODULE, SUITE) self.OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) self.EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) #define three modes of execution mode_desc_dict = {0: 'Initialise benchmark data in the expected directory', 1: 'Do not do ingestion. Compare existing ingestion ' \ 'in\n %s\n with benchmark\n %s\n' \ %(self.OUTPUT_DIR, self.EXPECTED_DIR), 2: 'Compare from this run with ' \ 'expected benchmark database exiting if ' \ 'they are different', 3: 'Compare databases and also compare tiles, even if ' \ 'the databases are different'} if len(sys.argv) < 2: mode = -1 else: try: mode = int(sys.argv[1]) except ValueError: mode = -1 msg = '' if mode not in [0, 1, 2, 3]: msg = 'Please specify a mode as follows:\n' for mode_num, desc in mode_desc_dict.items(): msg = msg + 'python test_landsat_tiler.py %d:\t%s\n' % ( mode_num, desc) return mode, msg
def process_args(self): MODULE = 'new_ingest_benchmark' SUITE = 'benchmark' self.INPUT_DIR = dbutil.input_directory(MODULE, SUITE) self.OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) self.EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) #define three modes of execution mode_desc_dict = {0: 'Initialise benchmark data in the expected directory', 1: 'Do not do ingestion. Compare existing ingestion ' \ 'in\n %s\n with benchmark\n %s\n' \ %(self.OUTPUT_DIR, self.EXPECTED_DIR), 2: 'Compare from this run with ' \ 'expected benchmark database exiting if ' \ 'they are different', 3: 'Compare databases and also compare tiles, even if ' \ 'the databases are different'} if len(sys.argv) < 2: mode = -1 else: try: mode = int(sys.argv[1]) except ValueError: mode = -1 msg = '' if mode not in [0, 1, 2, 3]: msg = 'Please specify a mode as follows:\n' for mode_num, desc in mode_desc_dict.items(): msg = msg + 'python test_landsat_tiler.py %d:\t%s\n' %(mode_num, desc) return mode, msg
def test_expected_directory_3(self): "Test test expected directory finder/creator, test 3." dummy_user = dbutil.random_name('user') expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_user, 'expected', 'module', 'suite') try: path = dbutil.expected_directory('module', 'suite', version='user', user=dummy_user) self.check_directory(path, expected_path) finally: os.removedirs(path)
def test_expected_directory_4(self): "Test test expected directory finder/creator, test 4." dummy_version = dbutil.random_name('version') expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_version, 'expected', 'module', 'suite') try: path = dbutil.expected_directory('module', 'suite', version=dummy_version) self.check_directory(path, expected_path) finally: os.removedirs(path)
def test_expected_directory_2(self): "Test test expected directory finder/creator, test 2." dummy_user = dbutil.random_name('user') expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_user, 'expected', 'module', 'suite') old_user = os.environ['USER'] try: os.environ['USER'] = dummy_user path = dbutil.expected_directory('module', 'suite', version='user') self.check_directory(path, expected_path) finally: os.environ['USER'] = old_user os.removedirs(path)
def test_expected_directory_5(self): "Test test expected directory finder/creator, test 5." dummy_version = dbutil.random_name('version') old_version = os.environ.get('DATACUBE_VERSION', None) expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_version, 'expected', 'module', 'suite') try: os.environ['DATACUBE_VERSION'] = dummy_version path = dbutil.expected_directory('module', 'suite') self.check_directory(path, expected_path) finally: if old_version is None: del os.environ['DATACUBE_VERSION'] else: os.environ['DATACUBE_VERSION'] = old_version os.removedirs(path)
def __init__(self, db1, db2, schema1, schema2): """ Positional Arguments: db1, db2: Connections to the databases to be compared. Keyword Arguments: schema1: The schema to be used for the first database (db1) schema2: The schema to be used for the second database (db2) """ # Set autocommit mode on the connections; retain the old settings. self.old_autocommit = (db1.autocommit, db2.autocommit) db1.autocommit = True db2.autocommit = True # Sanitise the schema names, just in case. self.schema1 = dbutil.safe_name(schema1) self.schema2 = dbutil.safe_name(schema2) # Wrap the connections to gain access to database structure queries. self.db1 = ComparisonWrapper(db1, self.schema1) self.db2 = ComparisonWrapper(db2, self.schema2) # Get the database names... self.db1_name = self.db1.database_name() self.db2_name = self.db2.database_name() # and qualify with the schema names if they are not 'public' if self.schema1 != 'public': self.db1_name = self.schema1 + '.' + self.db1_name if self.schema2 != 'public': self.db2_name = self.schema2 + '.' + self.db2_name # Set input, expected and output directores # Not used yet module = "tilecompare" suite = "TileCompare" self.input_dir = dbutil.input_directory(module, suite) self.output_dir = dbutil.output_directory(module, suite) self.expected_dir = dbutil.expected_directory(module, suite)
def test_update_config_file(self): "Test config file update utility." input_dir = dbutil.input_directory(self.MODULE, self.SUITE) output_dir = dbutil.output_directory(self.MODULE, self.SUITE) expected_dir = dbutil.expected_directory(self.MODULE, self.SUITE) dbname = 'TEST_TEST_TEST' config_file_name = 'test_datacube.conf' output_path = dbutil.update_config_file(dbname, input_dir, output_dir, config_file_name) expected_path = os.path.join(expected_dir, config_file_name) if not os.path.isfile(expected_path): self.skipTest("Expected config file not found.") else: try: subprocess.check_output(['diff', output_path, expected_path]) except subprocess.CalledProcessError as err: self.fail("Config file does not match expected result:\n" + err.output)
def check_output(self, file_name, output_str): """Check the output against an expected output file. This method also writes the output to a temporary directory, and skips the test if the expected output file is not present. The temporary output can be used as the expected output if it passes a manual check.""" output_dir_path = dbutil.output_directory(MODULE, self.SUITE) output_file_path = os.path.join(output_dir_path, file_name) with open(output_file_path, 'w') as output_file: output_file.write(output_str) expected_dir_path = dbutil.expected_directory(MODULE, self.SUITE) expected_file_path = os.path.join(expected_dir_path, file_name) if os.path.isfile(expected_file_path): with open(expected_file_path) as expected_file: expected_str = expected_file.read() self.assertEqual(output_str, expected_str) else: self.skipTest(("expected output file '%s' not found for " + "module '%s', suite '%s'.") % (file_name, MODULE, self.SUITE))
class TestTileRecord(unittest.TestCase): """Unit tests for the TileRecord class""" # pylint: disable=too-many-instance-attributes ############################### User area ################################# MODULE = 'tile_record' SUITE = 'TileRecord3' # Set to true if we want to populate expected directory with results, # without doing comparision. Set to False if we want to put (often # a subset of) results in output directory and compare against the # previously populated expected directory. POPULATE_EXPECTED = True ############################################ INPUT_DIR = dbutil.input_directory(MODULE, SUITE) OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) if POPULATE_EXPECTED: destination_dir = 'expected' else: destination_dir = 'output' TEMP_DIR = dbutil.temp_directory(MODULE, SUITE, destination_dir) TILE_ROOT_DIR = dbutil.tile_root_directory(MODULE, SUITE, destination_dir) def setUp(self): # # Parse out the name of the test case and use it to name a logfile # match = re.search(r'\.([^\.]+)$', self.id()) if match: name = match.group(1) else: name = 'TestIngester' logfile_name = "%s.log" % name self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name) self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name) if self.POPULATE_EXPECTED: self.logfile_path = os.path.join(self.EXPECTED_DIR, logfile_name) # # Set up a handler to log to the logfile, and attach it to the # root logger. # #logging.basicConfig() self.handler = logging.FileHandler(self.logfile_path, mode='w') self.handler.setLevel(logging.INFO) self.handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.handler) # Add a streamhandler to write output to console self.stream_handler = logging.StreamHandler(stream=sys.stdout) self.stream_handler.setLevel(logging.INFO) self.stream_handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.stream_handler) # Create an empty database self.test_conn = None self.test_dbname = dbutil.random_name("test_tile_record") LOGGER.info('Creating %s', self.test_dbname) dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # Set the datacube configuration file to point to the empty database configuration_dict = { 'dbname': self.test_dbname, 'temp_dir': self.TEMP_DIR, 'tile_root': self.TILE_ROOT_DIR } config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Set an instance of the datacube and pass it to an ingester instance test_args = TestArgs() test_args.config_file = config_file_path test_args.debug = False test_datacube = IngesterDataCube(test_args) self.ingester = TestIngester(datacube=test_datacube) self.collection = self.ingester.collection def tearDown(self): # # Flush the handler and remove it from the root logger. # self.handler.flush() self.stream_handler.flush() if self.test_dbname: if self.POPULATE_EXPECTED: dbutil.TESTSERVER.save(self.test_dbname, self.EXPECTED_DIR, 'hypercube_tile_record.sql') else: #TODO: make dbase comaprision kkk = -1 LOGGER.info('About to drop %s', self.test_dbname) dbutil.TESTSERVER.drop(self.test_dbname) LOGGER.removeHandler(self.handler) LOGGER.removeHandler(self.stream_handler) def xxxtest_insert_tile_record(self): """Test the Landsat tiling process method by comparing output to a file on disk.""" # pylint: disable=too-many-locals # Test a single dataset for tile_record creation processing_level = 'PQA' dataset_path = TestIngest.DATASETS_TO_INGEST[processing_level][0] LOGGER.info('Testing Dataset %s', dataset_path) dset = LandsatDataset(dataset_path) # Create a DatasetRecord instance so that we can access its # list_tile_types() method. In doing this we need to create a # collection object and entries on the acquisition and dataset # tables of the database. self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = os.path.join(self.ingester.datacube.tile_root, 'ingest_temp') # Form scene vrt ls_bandstack.buildvrt(self.collection.get_temp_tile_directory()) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_footprint in tile_footprint_list: tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_footprint, ls_bandstack) LOGGER.info('reprojecting for %s tile %s', processing_level, str(tile_footprint)) #Need to call reproject to set tile_contents.tile_extents tile_contents.reproject() if tile_contents.has_data(): dummy_tile_record = \ dset_record.create_tile_record(tile_contents) self.collection.commit_transaction() #TODO compare database with expected def test_aaa(self): pass def test_bbb(self): pass def test_make_mosaics(self): """Make mosaic tiles from two adjoining scenes.""" # pylint: disable=too-many-locals dataset_list = \ [TestIngest.DATASETS_TO_INGEST[level][i] for i in range(6) for level in ['PQA', 'NBAR', 'ORTHO']] dataset_list.extend(TestIngest.MOSAIC_SOURCE_NBAR) dataset_list.extend(TestIngest.MOSAIC_SOURCE_PQA) dataset_list.extend(TestIngest.MOSAIC_SOURCE_ORTHO) random.shuffle(dataset_list) LOGGER.info("Ingesting following datasets:") for dset in dataset_list: LOGGER.info('%d) %s', dataset_list.index(dset), dset) for dataset_path in dataset_list: LOGGER.info('Ingesting Dataset %d:\n%s', dataset_list.index(dataset_path), dataset_path) dset = LandsatDataset(dataset_path) self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = os.path.join(self.ingester.datacube.tile_root, 'ingest_temp') # Form scene vrt ls_bandstack.buildvrt(temp_dir) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_ftprint in tile_footprint_list: #Only do that footprint for which we have benchmark mosaics if tile_ftprint not in [(141, -38)]: continue tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_ftprint, ls_bandstack) LOGGER.info('Calling reproject for %s tile %s...', dset_record.mdd['processing_level'], tile_ftprint) tile_contents.reproject() LOGGER.info('...finished') if tile_contents.has_data(): LOGGER.info('tile %s has data', tile_contents.temp_tile_output_path) tile_record = dset_record.create_tile_record(tile_contents) mosaic_required = tile_record.make_mosaics() if not mosaic_required: continue # Test mosaic tiles against benchmark # At this stage, transaction for this dataset not yet # commited and so the tiles from this dataset, including # any mosaics are still in the temporary location. if self.POPULATE_EXPECTED: continue mosaic_benchmark = \ TestTileContents.swap_dir_in_path(tile_contents .mosaic_final_pathname, 'output', 'expected') mosaic_new = tile_contents.mosaic_temp_pathname LOGGER.info("Comparing test output with benchmark:\n"\ "benchmark: %s\ntest output: %s", mosaic_benchmark, mosaic_new) if dset_record.mdd['processing_level'] == 'PQA': LOGGER.info( "For PQA mosaic, calling load_and_check...") ([data1, data2], dummy_nlayers) = \ TestLandsatTiler.load_and_check( mosaic_benchmark, mosaic_new, tile_contents.band_stack.band_dict, tile_contents.band_stack.band_dict) LOGGER.info('Checking arrays ...') if ~(data1 == data2).all(): self.fail("Difference in PQA mosaic " "from expected result: %s and %s" % (mosaic_benchmark, mosaic_new)) # Check that differences are due to differing treatment # of contiguity bit. else: diff_cmd = [ "diff", "-I", "[Ff]ilename", "%s" % mosaic_benchmark, "%s" % mosaic_new ] result = execute(diff_cmd, shell=False) assert result['stdout'] == '', \ "Differences between vrt files" assert result['stderr'] == '', \ "Error in system diff command" else: LOGGER.info('... tile has no data') tile_contents.remove() self.collection.commit_transaction()
class TestDatasetFiltering(unittest.TestCase): """Unit and performance tests for dataset filtering.""" MODULE = 'landsat_ingester' SUITE = 'TestDatasetFiltering' INPUT_DIR = dbutil.input_directory(MODULE, SUITE) OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) SOURCE_DIR = '/g/data1/rs0/scenes/ARG25_V0.0/2014-03' def setUp(self): """Set up ingester.""" self.stopwatch = Stopwatch() updates = {'start_date': '01/03/2014', 'end_date': '15/03/2014', 'min_path': '090', 'max_path': '093', 'min_row': '087', 'max_row': '090' } config_file = dbutil.update_config_file2(updates, self.INPUT_DIR, self.OUTPUT_DIR, 'test_datacube.conf') sys.argv = [sys.argv[0], "--config=%s" % config_file, "--source=%s" % self.SOURCE_DIR ] self.ingester = LandsatIngester() @staticmethod def dump_dataset_names(output_path, dataset_list): """Dump the names of the datasets to a file. This writes a list of basenames from the paths in dataset_list to a file at output_path.""" out = open(output_path, 'w') for dataset_path in dataset_list: out.write(os.path.basename(dataset_path) + '\n') out.close() def check_datasets_list(self, output_path, expected_path): """If an expected datasets file exists, check to see if it matches.""" if not os.path.isfile(expected_path): self.skipTest("Expected dataset list file not found.") else: try: subprocess.check_output(['diff', output_path, expected_path]) except subprocess.CalledProcessError as err: self.fail("Filtered datasets do not match those expected:\n" + err.output) def test_fast_filter(self): """Test the results of a fast (filename based) filter.""" print "" print "Finding datasets ..." self.stopwatch.start() dataset_list = self.ingester.find_datasets(self.SOURCE_DIR) self.stopwatch.stop() (elapsed_time, cpu_time) = self.stopwatch.read() print "" print "%s datasets found." % len(dataset_list) print "elapsed time: %s" % elapsed_time print "cpu time: %s" % cpu_time print "" print "Doing fast filter ..." self.stopwatch.reset() self.stopwatch.start() filtered_list = self.ingester.fast_filter_datasets(dataset_list) self.stopwatch.stop() (elapsed_time, cpu_time) = self.stopwatch.read() print "" print "%s out of %s datasets remain." % \ (len(filtered_list), len(dataset_list)) print "elapsed time: %s" % elapsed_time print "cpu time: %s" % cpu_time print "" output_path = os.path.join(self.OUTPUT_DIR, 'fast_filter_datasets.txt') self.dump_dataset_names(output_path, filtered_list) expected_path = os.path.join(self.EXPECTED_DIR, 'filter_datasets.txt') self.check_datasets_list(output_path, expected_path) def test_metadata_filter(self): """Test the results of a metadata based filter.""" print "" print "Finding datasets ..." self.stopwatch.start() dataset_list = self.ingester.find_datasets(self.SOURCE_DIR) self.stopwatch.stop() (elapsed_time, cpu_time) = self.stopwatch.read() print "" print "%s datasets found." % len(dataset_list) print "elapsed time: %s" % elapsed_time print "cpu time: %s" % cpu_time print "" print "Doing metadata filter ..." self.stopwatch.reset() self.stopwatch.start() filtered_list = [] for dataset_path in dataset_list: dataset = self.ingester.open_dataset(dataset_path) try: self.ingester.filter_on_metadata(dataset) except DatasetError: pass else: filtered_list.append(dataset_path) self.stopwatch.stop() (elapsed_time, cpu_time) = self.stopwatch.read() print "" print "%s out of %s datasets remain." % \ (len(filtered_list), len(dataset_list)) print "elapsed time: %s" % elapsed_time print "cpu time: %s" % cpu_time print "" output_path = os.path.join(self.OUTPUT_DIR, 'metadata_filter_datasets.txt') self.dump_dataset_names(output_path, filtered_list) expected_path = os.path.join(self.EXPECTED_DIR, 'filter_datasets.txt') self.check_datasets_list(output_path, expected_path)
class TestTileCompare(unittest.TestCase): """Unit tests foe tilecompare functions.""" # pylint:disable=too-many-instance-attributes MODULE = 'tilecompare' SUITE = 'TileCompare' INPUT_DIR = dbutil.input_directory(MODULE, SUITE) OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) def setUp(self): match = re.search(r'\.([^\.]+)$', self.id()) if match: name = match.group(1) else: name = 'TestIngester' logfile_name = "%s.log" % name self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name) self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name) # # Set up a handler to log to the logfile, and attach it to the # root logger. # self.handler = logging.FileHandler(self.logfile_path, mode='w') self.handler.setLevel(logging.INFO) self.handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.handler) # Add a streamhandler to write output to console self.stream_handler = logging.StreamHandler(stream=sys.stdout) self.stream_handler.setLevel(logging.INFO) self.stream_handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.stream_handler) self.dbname1 = None #production database self.dbname2 = None #result of test_ingest self.conn1 = None self.conn2 = None def tearDown(self): # # Flush the handler and remove it from the root logger. # self.handler.flush() self.stream_handler.flush() if self.dbname2: LOGGER.info('About to drop %s', self.dbname2) self.conn2.close() dbutil.TESTSERVER.drop(self.dbname2) LOGGER.removeHandler(self.handler) LOGGER.removeHandler(self.stream_handler) def xxxtest_create_database(self): "Test random_name random database name generator." self.dbname1 = 'hypercube_v0' self.dbname2 = dbutil.random_name('test_create_database') LOGGER.info('Creating database %s', self.dbname2) dbutil.TESTSERVER.create(self.dbname2, self.INPUT_DIR, 'hypercube_test_ingest.sql') self.conn1 = dbutil.TESTSERVER.connect(self.dbname1) self.conn2 = dbutil.TESTSERVER.connect(self.dbname2) LOGGER.info('About to create database from file') dbutil.TESTSERVER.create(self.dbname, self.INPUT_DIR, 'hypercube_test_ingest.sql') LOGGER.info('.done') def xxxtest_create_tile_acqusition_info(self): "Test creation of tile_acquisition_info table." "" self.dbname1 = 'hypercube_test' self.dbname2 = dbutil.random_name('test_tilecompare') LOGGER.info('Creating database %s', self.dbname2) dbutil.TESTSERVER.create(self.dbname2, self.INPUT_DIR, 'hypercube_test_ingest.sql') self.conn1 = dbutil.TESTSERVER.connect(self.dbname1, autocommit=False) self.conn2 = dbutil.TESTSERVER.connect(self.dbname2, autocommit=False) LOGGER.info('About to create comparision pair') pair = tilecompare.TileComparisonPair(self.conn1, self.conn2, 'public', 'public') LOGGER.info('About to create table from fresh ingest') fresh_ingest_info_table = 'fresh_ingest_info' comparison_table = 'ingest_comparison' tilecompare._copy_ingest_tile_acquisition_info( pair, fresh_ingest_info_table) LOGGER.info('About to create comparison table') tilecompare._create_comparison_table(pair, fresh_ingest_info_table, comparison_table) LOGGER.info('About to compare the tile contents') tilecompare._compare_tile_contents(pair, comparison_table) def test_compare_tile_stores(self): "Test creation of tile_acquisition_info table." "" self.dbname1 = 'hypercube_test2_v0' self.dbname2 = dbutil.random_name('test_tilecompare') LOGGER.info('Creating database %s', self.dbname2) dbutil.TESTSERVER.create(self.dbname2, self.INPUT_DIR, 'hypercube_test_ingest.sql') #Temp #print 'Loading production database %s' %self.dbname1 #dbutil.TESTSERVER.create(self.dbname1, '/g/data/v10/test_resources/databases', # 'hypercube_v0.sql') #self.dbname1 = 'hypercube_test2_v0' #print 'Loading production database %s' %self.dbname1 #dbutil.TESTSERVER.create(self.dbname1, '/g/data/v10/test_resources/databases', # 'hypercube_v0.sql') #return #Temp self.conn1 = dbutil.TESTSERVER.connect(self.dbname1, autocommit=False) self.conn2 = dbutil.TESTSERVER.connect(self.dbname2, autocommit=False) LOGGER.info('About to call compare_tile_stores') fout = open( os.path.join(self.OUTPUT_DIR, 'tile_comparison_output.txt'), 'w') fout = sys.stdout #temp difference_pairs = tilecompare.compare_tile_stores(self.conn1, self.conn2, output=fout) LOGGER.info('Finished calling compare_tile_stores') if difference_pairs != []: report_string = "Fresh ingest tile content differs from the " \ "benchmark:\n" for pair in difference_pairs: report_string = report_string + "Benchmark tile:\n%s\nFresh" \ "Ingest tile:\n%s\n" %(pair[0], pair[1]) self.fail(report_string) else: print 'Everything passed'
class TestLandsatDataset(unittest.TestCase): """Unit tests for the LandsatDataset class.""" MODULE = 'landsat_dataset' SUITE = 'TestLandsatDataset' INPUT_DIR = dbutil.input_directory(MODULE, SUITE) OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) ORTHO_DIR = 'dataset_testing/L1/2012-05' ORTHO_SCENE = 'LS7_ETM_OTH_P51_GALPGS01-002_092_089_20120507' NBAR_DIR = 'dataset_testing/NBAR/2012-05' NBAR_SCENE = 'LS7_ETM_NBAR_P54_GANBAR01-002_092_089_20120507' PQ_DIR = 'dataset_testing/PQ/2012-05' PQ_SCENE = 'LS7_ETM_PQ_P55_GAPQ01-002_092_089_20120507' FC_DIR = 'dataset_testing/FC/2012-05' FC_SCENE = 'LS7_ETM_FC_P54_GAFC01-002_092_089_20120507' ORTHO8_DIR = 'dataset_testing/L1/2014-03' ORTHO8_SCENE = 'LS8_OLITIRS_OTH_P51_GALPGS01-002_089_082_20140313' NBAR8_DIR = 'dataset_testing/NBAR/2014-03' NBAR8_SCENE = 'LS8_OLI_TIRS_NBAR_P54_GANBAR01-002_089_082_20140313' PQ8_DIR = 'dataset_testing/PQ/2014-03' PQ8_SCENE = 'LS8_OLI_TIRS_PQ_P55_GAPQ01-002_089_082_20140313' FC8_DIR = 'dataset_testing/FC/2014-03' FC8_SCENE = 'LS8_OLI_TIRS_FC_P54_GAFC01-002_089_082_20140313' METADATA_KEYS = [ 'dataset_path', 'satellite_tag', 'sensor_name', 'processing_level', 'x_ref', 'y_ref', 'start_datetime', 'end_datetime', 'datetime_processed', 'dataset_size', 'll_lon', 'll_lat', 'lr_lon', 'lr_lat', 'ul_lon', 'ul_lat', 'ur_lon', 'ur_lat', 'projection', 'll_x', 'll_y', 'lr_x', 'lr_y', 'ul_x', 'ul_y', 'ur_x', 'ur_y', 'x_pixels', 'y_pixels', 'gcp_count', 'mtl_text', 'cloud_cover', 'xml_text', 'geo_transform', 'pq_tests_run' ] LARGE_METADATA_KEYS = ['mtl_text', 'xml_text'] SMALL_METADATA_KEYS = [ k for k in METADATA_KEYS if k not in LARGE_METADATA_KEYS ] CROSSCHECK_KEYS_ONE = [ 'satellite_tag', 'sensor_name', 'x_ref', 'y_ref', 'll_lon', 'll_lat', 'lr_lon', 'lr_lat', 'ul_lon', 'ul_lat', 'ur_lon', 'ur_lat', 'projection', 'll_x', 'll_y', 'lr_x', 'lr_y', 'ul_x', 'ul_y', 'ur_x', 'ur_y', 'x_pixels', 'y_pixels', 'cloud_cover', 'geo_transform' ] CROSSCHECK_KEYS_TWO = CROSSCHECK_KEYS_ONE + [ 'start_datetime', 'end_datetime' ] @classmethod def setUpClass(cls): """Set up logging for EOtools.DatasetDrivers._scene_dataset.SceneDataset debug output.""" cls.SD_LOGGER = logging.getLogger( 'EOtools.DatasetDrivers._scene_dataset') cls.SD_HANDLER = logging.FileHandler(os.path.join( cls.OUTPUT_DIR, 'scene_dataset.log'), mode='w') cls.SD_LOGGER.addHandler(cls.SD_HANDLER) cls.SD_OLD_LEVEL = cls.SD_LOGGER.level cls.SD_LOGGER.setLevel(logging.DEBUG) @classmethod def tearDownClass(cls): """Clean up _scene_dataset logging.""" cls.SD_LOGGER.setLevel(cls.SD_OLD_LEVEL) cls.SD_LOGGER.removeHandler(cls.SD_HANDLER) cls.SD_HANDLER.close() def setUp(self): self.SD_LOGGER.debug("") self.SD_LOGGER.debug( "---%s" + "-" * (72 - (len(self._testMethodName) + 3)), self._testMethodName) self.SD_LOGGER.debug("") def tearDown(self): self.SD_LOGGER.debug("") self.SD_LOGGER.debug("-" * 72) self.SD_LOGGER.debug("") self.SD_HANDLER.flush() def test_build_metadata_dict(self): """Test for the build_metadata_dict method. This method is actually defined in AbstractDataset, but an AbstractDataset cannot be instantiated, so it is tested here. """ ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO_DIR, self.ORTHO_SCENE)) mdd = ortho_ds.metadata_dict self.assertEqual(set(self.METADATA_KEYS), set(mdd.keys())) for k in mdd.keys(): mdd_value = mdd[k] accessor_name = 'get_' + k accessor_value = getattr(ortho_ds, accessor_name)() self.assertEqual(mdd_value, accessor_value) def test_ortho_scene(self): """Test for an ORTHO (level 1) scene.""" ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO_DIR, self.ORTHO_SCENE)) mdd = ortho_ds.metadata_dict self.dump_metadata('ortho_metadata.txt', mdd, self.SMALL_METADATA_KEYS) self.dump_string('ortho_xml.xml', mdd['xml_text']) self.dump_string('ortho_mtl.txt', mdd['mtl_text']) self.check_file('ortho_metadata.txt') self.check_file('ortho_xml.xml') self.check_file('ortho_mtl.txt') def test_ortho8_scene(self): """Test for a Landsat 8 ORTHO (level 1) scene.""" ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO8_DIR, self.ORTHO8_SCENE)) mdd = ortho_ds.metadata_dict self.dump_metadata('ortho8_metadata.txt', mdd, self.SMALL_METADATA_KEYS) self.dump_string('ortho8_xml.xml', mdd['xml_text']) self.dump_string('ortho8_mtl.txt', mdd['mtl_text']) self.check_file('ortho8_metadata.txt') self.check_file('ortho8_xml.xml') self.check_file('ortho8_mtl.txt') def test_nbar_scene(self): """Test for an NBAR scene.""" nbar_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.NBAR_DIR, self.NBAR_SCENE)) mdd = nbar_ds.metadata_dict self.dump_metadata('nbar_metadata.txt', mdd, self.SMALL_METADATA_KEYS) self.dump_string('nbar_xml.xml', mdd['xml_text']) self.assertIsNone(mdd['mtl_text']) self.check_file('nbar_metadata.txt') self.check_file('nbar_xml.xml') def test_nbar8_scene(self): """Test for a Landsat 8 NBAR scene.""" nbar_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.NBAR8_DIR, self.NBAR8_SCENE)) mdd = nbar_ds.metadata_dict self.dump_metadata('nbar8_metadata.txt', mdd, self.SMALL_METADATA_KEYS) self.dump_string('nbar8_xml.xml', mdd['xml_text']) self.assertIsNone(mdd['mtl_text']) self.check_file('nbar8_metadata.txt') self.check_file('nbar8_xml.xml') def test_pq_scene(self): """Test for a Pixel Quality scene.""" pq_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.PQ_DIR, self.PQ_SCENE)) mdd = pq_ds.metadata_dict self.dump_metadata('pq_metadata.txt', mdd, self.SMALL_METADATA_KEYS) self.dump_string('pq_xml.xml', mdd['xml_text']) self.assertIsNone(mdd['mtl_text']) self.check_file('pq_metadata.txt') self.check_file('pq_xml.xml') def test_pq8_scene(self): """Test for a Landsat 8 Pixel Quality scene.""" pq_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.PQ8_DIR, self.PQ8_SCENE)) mdd = pq_ds.metadata_dict self.dump_metadata('pq8_metadata.txt', mdd, self.SMALL_METADATA_KEYS) self.dump_string('pq8_xml.xml', mdd['xml_text']) self.assertIsNone(mdd['mtl_text']) self.check_file('pq8_metadata.txt') self.check_file('pq8_xml.xml') def test_fc_scene(self): """Test for a Fractional Cover scene.""" fc_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.FC_DIR, self.FC_SCENE)) mdd = fc_ds.metadata_dict self.dump_metadata('fc_metadata.txt', mdd, self.SMALL_METADATA_KEYS) self.dump_string('fc_xml.xml', mdd['xml_text']) self.assertIsNone(mdd['mtl_text']) self.check_file('fc_metadata.txt') self.check_file('fc_xml.xml') def test_fc8_scene(self): """Test for a Landsat 8 Fractional Cover scene.""" fc_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.FC8_DIR, self.FC8_SCENE)) mdd = fc_ds.metadata_dict self.dump_metadata('fc8_metadata.txt', mdd, self.SMALL_METADATA_KEYS) self.dump_string('fc8_xml.xml', mdd['xml_text']) self.assertIsNone(mdd['mtl_text']) self.check_file('fc8_metadata.txt') self.check_file('fc8_xml.xml') def test_crosscheck_ortho_nbar(self): """Cross-check metadata between ortho and nbar datasets.""" ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO_DIR, self.ORTHO_SCENE)) nbar_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.NBAR_DIR, self.NBAR_SCENE)) self.cross_check(ortho_ds, nbar_ds, self.CROSSCHECK_KEYS_TWO) def test_crosscheck_ortho8_nbar8(self): """Cross-check metadata between Landsat 8 ortho and nbar datasets.""" ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO8_DIR, self.ORTHO8_SCENE)) nbar_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.NBAR8_DIR, self.NBAR8_SCENE)) self.cross_check(ortho_ds, nbar_ds, self.CROSSCHECK_KEYS_TWO) def test_crosscheck_ortho_pq(self): """Cross-check metadata between ortho and pq datasets.""" ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO_DIR, self.ORTHO_SCENE)) pq_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.PQ_DIR, self.PQ_SCENE)) self.cross_check(ortho_ds, pq_ds, self.CROSSCHECK_KEYS_TWO) def test_crosscheck_ortho8_pq8(self): """Cross-check metadata between Landsat 8 ortho and pq datasets.""" ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO8_DIR, self.ORTHO8_SCENE)) pq_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.PQ8_DIR, self.PQ8_SCENE)) self.cross_check(ortho_ds, pq_ds, self.CROSSCHECK_KEYS_TWO) def test_crosscheck_ortho_fc(self): """Cross-check metadata between ortho and fc datasets.""" ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO_DIR, self.ORTHO_SCENE)) fc_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.FC_DIR, self.FC_SCENE)) self.cross_check(ortho_ds, fc_ds, self.CROSSCHECK_KEYS_TWO) def test_crosscheck_ortho8_fc8(self): """Cross-check metadata between Landsat 8 ortho and fc datasets.""" ortho_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.ORTHO8_DIR, self.ORTHO8_SCENE)) fc_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.FC8_DIR, self.FC8_SCENE)) self.cross_check(ortho_ds, fc_ds, self.CROSSCHECK_KEYS_TWO) def test_crosscheck_pq_fc(self): """Cross-check metadata between pc and fc datasets.""" pq_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.PQ_DIR, self.PQ_SCENE)) fc_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.FC_DIR, self.FC_SCENE)) self.cross_check(pq_ds, fc_ds, self.CROSSCHECK_KEYS_TWO) def test_crosscheck_pq8_fc8(self): """Cross-check metadata between Landsat 8 pc and fc datasets.""" pq_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.PQ8_DIR, self.PQ8_SCENE)) fc_ds = LandsatDataset( os.path.join(self.INPUT_DIR, self.FC8_DIR, self.FC8_SCENE)) self.cross_check(pq_ds, fc_ds, self.CROSSCHECK_KEYS_TWO) def dump_metadata(self, file_name, mdd, md_keys): """Dump a metadata dictionary to a file. file_name - The name of the file. This will be created in the output directory. mdd - The dictionary containing the metadata. md_keys - A list of keys to use for the dump. These must be in mdd. """ out_file_path = os.path.join(self.OUTPUT_DIR, file_name) out_file = open(out_file_path, 'w') for k in md_keys: val = mdd[k] if k == 'pq_tests_run' and val is not None: val = '{:016b}'.format(val) print >> out_file, "%s: %s" % (k, val) out_file.close() def dump_string(self, file_name, string): """Dump a string to a file. file_name - The name of the file. This will be created in the output directory. string - The string to be dumped. """ out_file_path = os.path.join(self.OUTPUT_DIR, file_name) out_file = open(out_file_path, 'w') out_file.write(string) out_file.close() def check_file(self, file_name): """Checks to see if a file is as expected. This checks the file in the output directory against the file of the same name in the expected directory. It uses the diff program to generate useful output in case of a difference. It skips the test if the expected file does not exist. """ output_path = os.path.join(self.OUTPUT_DIR, file_name) expected_path = os.path.join(self.EXPECTED_DIR, file_name) if not os.path.isfile(expected_path): self.skipTest("Expected file '%s' not found." % file_name) else: try: subprocess.check_output(['diff', output_path, expected_path]) except subprocess.CalledProcessError as err: self.fail("File '%s' not as expected:\n" % file_name + err.output) def cross_check(self, ds1, ds2, md_keys): """Checks that the metadata from two datasets matches. ds1 and ds2 are two datasets, md_keys is a list of keys to be checked. The routine checks that the metadata matches for each key in md_keys.""" mdd1 = ds1.metadata_dict mdd2 = ds2.metadata_dict for k in md_keys: self.assertEqual(mdd1[k], mdd2[k]) def check_fuzzy_datetime_match(self, ds1, ds2): """Checks for an approximate match between start and end datetimes.""" start1 = ds1.metadata_dict['start_datetime'] end1 = ds1.metadata_dict['end_datetime'] start2 = ds2.metadata_dict['start_datetime'] end2 = ds2.metadata_dict['end_datetime'] overlap = self.calculate_overlap(start1, end1, start2, end2) self.assertGreaterEqual(overlap, 0.9) @staticmethod def calculate_overlap(start1, end1, start2, end2): """Calculate the fractional overlap between time intervals.""" interval_length = max((end1 - start1), (end2 - start2)) interval_seconds = interval_length.total_seconds() overlap_start = max(start1, start2) overlap_end = min(end1, end2) if overlap_end > overlap_start: overlap_length = overlap_end - overlap_start overlap_seconds = overlap_length.total_seconds() else: overlap_seconds = 0.0 return overlap_seconds / interval_seconds
class TestTileContents(unittest.TestCase): """Unit tests for the TileContents class""" #pylint: disable=too-many-instance-attributes ############################### User area ################################# MODULE = 'tile_contents' SUITE = 'TileContents' # Set to true if we want to populate expected directory with results, # without doing comparision. Set to False if we want to put (often # a subset of) results in output directory and compare against the # previously populated expected directory. POPULATE_EXPECTED = False ########################################################################### INPUT_DIR = dbutil.input_directory(MODULE, SUITE) OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) if POPULATE_EXPECTED: destination_dir = 'expected' else: destination_dir = 'output' TEMP_DIR = dbutil.temp_directory(MODULE, SUITE, destination_dir) TILE_ROOT_DIR = dbutil.tile_root_directory(MODULE, SUITE, destination_dir) def setUp(self): # # Parse out the name of the test case and use it to name a logfile # match = re.search(r'\.([^\.]+)$', self.id()) if match: name = match.group(1) else: name = 'TestIngester' logfile_name = "%s.log" % name self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name) self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name) # # Set up a handler to log to the logfile, and attach it to the # root logger. # self.handler = logging.FileHandler(self.logfile_path, mode='w') self.handler.setLevel(logging.INFO) self.handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.handler) # Add a streamhandler to write output to console # self.stream_handler = logging.StreamHandler(stream=sys.stdout) # self.stream_handler.setLevel(logging.INFO) # self.stream_handler.setFormatter(logging.Formatter('%(message)s')) # LOGGER.addHandler(self.stream_handler) # Create an empty database self.test_conn = None self.test_dbname = dbutil.random_name("test_tile_contents") LOGGER.info('Creating %s', self.test_dbname) dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # Set the datacube configuration file to point to the empty database configuration_dict = { 'dbname': self.test_dbname, 'temp_dir': self.TEMP_DIR, 'tile_root': self.TILE_ROOT_DIR } config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Set an instance of the datacube and pass it to an ingester instance test_args = TestArgs() test_args.config_file = config_file_path test_args.debug = False test_datacube = IngesterDataCube(test_args) self.ingester = TestIngester(datacube=test_datacube) self.collection = self.ingester.collection def tearDown(self): # # Flush the handler and remove it from the root logger. # self.handler.flush() # self.stream_handler.flush() LOGGER.removeHandler(self.handler) # LOGGER.removeHandler(self.stream_handler) if self.test_dbname: if self.POPULATE_EXPECTED: dbutil.TESTSERVER.save(self.test_dbname, self.EXPECTED_DIR, 'hypercube_tile_contents.sql') else: #TODO: make dbase comaprision kkk = -1 LOGGER.info('About to drop %s', self.test_dbname) dbutil.TESTSERVER.drop(self.test_dbname) @staticmethod def swap_dir_in_path(fullpath, dir1, dir2): """Given a pathname fullpath, replace right-most occurrence of dir1 with dir2 and return the result.""" dirname = fullpath leaf = None newpath_list = [] while leaf != '': dirname, leaf = os.path.split(dirname) if leaf == dir1: newpath_list.append(dir2) break newpath_list.append(leaf) newpath = dirname newpath_list.reverse() for subdirectory in newpath_list: newpath = os.path.join(newpath, subdirectory) return newpath @staticmethod def get_benchmark_footprints(dset_dict, benchmark_dir): """Given a dataset_dict, parse the list of files in benchmark directory and generate a list of tile footprints.""" # Get information from dataset that will be used to match # tile pathnames in the benchmark directory sat = dset_dict['satellite_tag'].upper() sensor = dset_dict['sensor_name'].upper() level = dset_dict['processing_level'] ymd_str = re.match(r'(.*)T', dset_dict['start_datetime'].isoformat()).group(1) #return # From the list of benchmark files get the list of footprints # for this dataset. pattern = re.compile(r'%s_%s_%s_(?P<xindex>[-]*\d+)_' r'(?P<yindex>[-]*\d+)_' r'.*%s.*ti[f]*$' % (sat, sensor, level, ymd_str)) matchobject_list = [ re.match(pattern, filename).groupdict() for filename in os.listdir(benchmark_dir) if re.match(pattern, filename) ] footprints = [(int(m['xindex']), int(m['yindex'])) for m in matchobject_list] return footprints @staticmethod def get_benchmark_tile(dset_dict, benchmark_dir, footprint): """Given the dataset metadata dictionary and the benchmark directory, get the tile which matches the current footprint.""" xindex, yindex = footprint sat = dset_dict['satellite_tag'].upper() sensor = dset_dict['sensor_name'].upper() if sensor == 'ETM+': sensor = 'ETM' level = dset_dict['processing_level'] ymd_str = re.match(r'(.*)T', dset_dict['start_datetime'].isoformat()).group(1) # Match .tif or .vrt file_pattern = r'%s_%s_%s_%03d_%04d_%s.*\.(tif{1,2}|vrt)$' \ %(sat, sensor, level, xindex, yindex, ymd_str) filelist = [ filename for filename in os.listdir(benchmark_dir) if re.match(file_pattern, filename) ] assert len(filelist) <= 1, "Unexpected multiple benchmark tiles" if len(filelist) == 1: return os.path.join(benchmark_dir, filelist[0]) else: return None def test_reproject(self): """Test the Landsat tiling process method by comparing output to a file on disk.""" # pylint: disable=too-many-locals #For each processing_level, and dataset keep record of those #tile footprints in the benchmark set. for iacquisition in range(len(TestIngest.DATASETS_TO_INGEST['PQA'])): for processing_level in ['PQA', 'NBAR', 'ORTHO']: #Skip all but PQA and ORTHO for first dataset. #TODO program this in as a paramter of the suite #if iacquisition > 0: # continue #if processing_level in ['NBAR']: # continue dataset_path = \ TestIngest.DATASETS_TO_INGEST[processing_level]\ [iacquisition] LOGGER.info('Testing Dataset %s', dataset_path) dset = LandsatDataset(dataset_path) #return # Create a DatasetRecord instance so that we can access its # list_tile_types() method. In doing this we need to create a # collection object and entries on the acquisition and dataset # tables of the database. self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) # Form scene vrt ls_bandstack.buildvrt( self.collection.get_temp_tile_directory()) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_footprint in tile_footprint_list: #Skip all but PQA and ORTHO for first dataset. #TODO program this in as a paramter of the suite #if tile_footprint not in [(117, -35), (115, -34)]: # continue tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_footprint, ls_bandstack) LOGGER.info('reprojecting for %s tile %s...', processing_level, str(tile_footprint)) tile_contents.reproject() LOGGER.info('...done') if self.POPULATE_EXPECTED: continue #Do comparision with expected results tile_benchmark = self.swap_dir_in_path( tile_contents.tile_output_path, 'output', 'expected') if tile_contents.has_data(): LOGGER.info('Tile %s has data', str(tile_footprint)) LOGGER.info("Comparing test output with benchmark:\n"\ "benchmark: %s\ntest output: %s", tile_benchmark, tile_contents.temp_tile_output_path) # Do comparision with expected directory LOGGER.info('Calling load and check ...') ([data1, data2], dummy_nlayers) = \ TestLandsatTiler.load_and_check( tile_benchmark, tile_contents.temp_tile_output_path, tile_contents.band_stack.band_dict, tile_contents.band_stack.band_dict) LOGGER.info('Checking arrays ...') if not (data1 == data2).all(): self.fail("Reprojected tile differs " \ "from %s" %tile_benchmark) LOGGER.info('...OK') else: LOGGER.info('No data in %s', str(tile_footprint)) LOGGER.info('-' * 80) self.collection.commit_transaction()
class TestTileContents(unittest.TestCase): """Unit tests for the TileContents class""" #pylint: disable=too-many-instance-attributes MODULE = 'tile_contents' SUITE = 'TileContents2' INPUT_DIR = dbutil.input_directory(MODULE, SUITE) OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) EXAMPLE_TILE = '/g/data/v10/test_resources/benchmark_results/gdalwarp/...' def setUp(self): # # Parse out the name of the test case and use it to name a logfile # match = re.search(r'\.([^\.]+)$', self.id()) if match: name = match.group(1) else: name = 'TestIngester' logfile_name = "%s.log" % name self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name) self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name) # # Set up a handler to log to the logfile, and attach it to the # root logger. # self.handler = logging.FileHandler(self.logfile_path, mode='w') self.handler.setLevel(logging.INFO) self.handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.handler) # Create an empty database self.test_conn = None self.test_dbname = dbutil.random_name("test_tile_contents") LOGGER.info('Creating %s', self.test_dbname) dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # Set the datacube configuration file to point to the empty database configuration_dict = { 'dbname': self.test_dbname, 'tile_root': os.path.join(self.OUTPUT_DIR, 'tiles') } config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Set an instance of the datacube and pass it to an ingester instance test_args = TestArgs() test_args.config_file = config_file_path test_args.debug = False test_datacube = IngesterDataCube(test_args) self.ingester = TestIngester(datacube=test_datacube) self.collection = self.ingester.collection def tearDown(self): # # Flush the handler and remove it from the root logger. # self.handler.flush() streamhandler.flush() root_logger = logging.getLogger() root_logger.removeHandler(self.handler) if self.test_dbname: LOGGER.info('About to drop %s', self.test_dbname) dbutil.TESTSERVER.drop(self.test_dbname) @staticmethod def get_benchmark_footprints(dset_dict, benchmark_dir): """Given a dataset_dict, parse the list of files in benchmark directory and generate a list of tile footprints.""" # Get information from dataset that will be used to match # tile pathnames in the benchmark directory sat = dset_dict['satellite_tag'].upper() sensor = dset_dict['sensor_name'].upper() level = dset_dict['processing_level'] ymd_str = re.match(r'(.*)T', dset_dict['start_datetime'].isoformat()).group(1) #return # From the list of benchmark files get the list of footprints # for this dataset. pattern = re.compile(r'%s_%s_%s_(?P<xindex>[-]*\d+)_' r'(?P<yindex>[-]*\d+)_' r'.*%s.*ti[f]*$' % (sat, sensor, level, ymd_str)) matchobject_list = [ re.match(pattern, filename).groupdict() for filename in os.listdir(benchmark_dir) if re.match(pattern, filename) ] footprints = [(int(m['xindex']), int(m['yindex'])) for m in matchobject_list] return footprints @staticmethod def get_benchmark_tile(dset_dict, benchmark_dir, footprint): """Given the dataset metadata dictionary and the benchmark directory, get the tile which matches the current footprint.""" xindex, yindex = footprint sat = dset_dict['satellite_tag'].upper() sensor = dset_dict['sensor_name'].upper() if sensor == 'ETM+': sensor = 'ETM' level = dset_dict['processing_level'] ymd_str = re.match(r'(.*)T', dset_dict['start_datetime'].isoformat()).group(1) # Match .tif or .vrt file_pattern = r'%s_%s_%s_%03d_%04d_%s.*\.(tif{1,2}|vrt)$' \ %(sat, sensor, level, xindex, yindex, ymd_str) filelist = [ filename for filename in os.listdir(benchmark_dir) if re.match(file_pattern, filename) ] assert len(filelist) <= 1, "Unexpected multiple benchmark tiles" if len(filelist) == 1: return os.path.join(benchmark_dir, filelist[0]) else: return None def test_reproject(self): """Test the Landsat tiling process method by comparing output to a file on disk.""" # pylint: disable=too-many-locals #For each processing_level, and dataset keep record of those #tile footprints in the benchmark set. bench_footprints = {} for iacquisition in range(len(TestIngest.DATASETS_TO_INGEST['PQA'])): for processing_level in ['PQA', 'NBAR', 'ORTHO']: #Skip all but PQA and ORTHO for first dataset. #TODO program this in as a paramter of the suite if iacquisition > 0: continue if processing_level in ['NBAR']: continue dataset_path = \ TestIngest.DATASETS_TO_INGEST[processing_level]\ [iacquisition] LOGGER.info('Testing Dataset %s', dataset_path) dset = LandsatDataset(dataset_path) #return # Create a DatasetRecord instance so that we can access its # list_tile_types() method. In doing this we need to create a # collection object and entries on the acquisition and dataset # tables of the database. self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) self.collection.commit_transaction() # List the benchmark footprints associated with this datset ftprints = \ self.get_benchmark_footprints(dset_record.mdd, TestIngest.BENCHMARK_DIR) bench_footprints.setdefault(processing_level, {}) bench_footprints[processing_level].setdefault(iacquisition, {}) bench_footprints[processing_level][iacquisition] = ftprints LOGGER.info('bench_footprints=%s', str(ftprints)) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = os.path.join(self.ingester.datacube.tile_root, 'ingest_temp') # Form scene vrt ls_bandstack.buildvrt(temp_dir) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_footprint in tile_footprint_list: #Skip all but PQA and ORTHO for first dataset. #TODO program this in as a paramter of the suite if tile_footprint not in [(117, -35), (115, -34)]: continue tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_footprint, ls_bandstack) LOGGER.info('reprojecting for %s tile %s', processing_level, str(tile_footprint)) tile_contents.reproject() # Because date-time of PQA datasets is coming directly from # the PQA dataset, rather NBAR, match on ymd string of # datetime, rather than the micorseconds version in the # NBAR data. tile_benchmark = \ self.get_benchmark_tile(dset_record.mdd, TestIngest.BENCHMARK_DIR, tile_footprint) LOGGER.info('tile_benchmark is %s', tile_benchmark) if tile_contents.has_data(): LOGGER.info('Tile %s has data', str(tile_footprint)) # The tile might have data but, if PQA does not, then # the benchmark tile will not exist if tile_footprint not in bench_footprints \ [processing_level][iacquisition]: assert tile_footprint not in \ bench_footprints['PQA'][iacquisition], \ "Old ingester found PQA tile and should have "\ "found cooresponding tile for %s"\ %processing_level LOGGER.info('%s tile %s has data in new ingester', processing_level, str(tile_footprint)) continue # Tile exists in old ingester and new ingester LOGGER.info('Calling load and check ...') ([data1, data2], dummy_nlayers) = \ TestLandsatTiler.load_and_check( tile_benchmark, tile_contents.temp_tile_output_path, tile_contents.band_stack.band_dict, tile_contents.band_stack.band_dict) LOGGER.info('Checking arrays ...') assert (data1 == data2).all(), \ "Reprojected tile differs " \ "from %s" %tile_benchmark LOGGER.info('...OK') else: LOGGER.info('No data in %s', str(tile_footprint)) assert tile_footprint not in \ bench_footprints[processing_level][iacquisition], \ "%s tile %s does not have data " \ %(processing_level, str(tile_footprint)) LOGGER.info('-' * 80)
class TestDBUpdater(unittest.TestCase): """Unit tests for the dbupdater.py script""" MODULE = 'dbupdater' SUITE = 'TestDBUpdater' INPUT_DIR = dbutil.input_directory(MODULE, SUITE) OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) def setUp(self): self.test_dbname = None self.expected_dbname = None self.test_conn = None self.expected_conn = None self.logfile = None def test_onescene(self): """Test database update for a single scene.""" # This test is intended as an example, and so is extensively # commented. # Open a log file logfile_path = os.path.join(self.OUTPUT_DIR, "test_onescene.log") self.logfile = open(logfile_path, "w") # # Create the initial database # # Randomise the name to avoid collisons with other users. self.test_dbname = dbutil.random_name("test_onescene") # Create the database. dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # # Run dbupdater on the test database and save the result # # Create an updated datacube_conf file with the new dbname config_file_path = dbutil.update_config_file(self.test_dbname, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Run dbupdater ingest_dir = os.path.join(self.INPUT_DIR, 'onescene') dbupdater_cmd = ["python", "dbupdater.py", "--debug", "--config=%s" % config_file_path, "--source=%s" % ingest_dir, "--removedblist", "--followsymlinks"] subprocess.check_call(dbupdater_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Save the updated database dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR, "onescene.sql") # # If an expected result exists then load it and compare # # Check for expected result if os.path.isfile(os.path.join(self.EXPECTED_DIR, "onescene.sql")): # Create a randomised name... self.expected_dbname = dbutil.random_name("expected_onescene") # load the database... dbutil.TESTSERVER.create(self.expected_dbname, self.EXPECTED_DIR, "onescene.sql") # create database connections... self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname) self.expected_conn = dbutil.TESTSERVER.connect( self.expected_dbname) # and compare. self.assertTrue(dbcompare.compare_databases(self.test_conn, self.expected_conn, output=self.logfile, verbosity=3), "Databases do not match.") else: self.skipTest("Expected database save file not found.") def tearDown(self): # Remove any tempoary databases that have been created. if self.test_conn: self.test_conn.close() if self.expected_conn: self.expected_conn.close() if self.test_dbname: dbutil.TESTSERVER.drop(self.test_dbname) if self.expected_dbname: dbutil.TESTSERVER.drop(self.expected_dbname) # Close the logfile if self.logfile: self.logfile.close()
class TestLandsatBandstack(unittest.TestCase): """Unit tests for the LandsatBandstack class""" MODULE = 'landsat_bandstack' SUITE = 'LandsatBandstack' INPUT_DIR = dbutil.input_directory(MODULE, SUITE) OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE) EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE) TEMP_DIR = dbutil.temp_directory(MODULE, SUITE, 'output') TILE_ROOT_DIR = dbutil.tile_root_directory(MODULE, SUITE, 'output') BENCHMARK_DIR = os.path.join(EXPECTED_DIR, 'scene_vrt_files') def setUp(self): # # Parse out the name of the test case and use it to name a logfile # match = re.search(r'\.([^\.]+)$', self.id()) if match: name = match.group(1) else: name = 'TestIngester' logfile_name = "%s.log" % name self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name) self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name) # # Set up a handler to log to the logfile, and attach it to the # root logger. # self.handler = logging.FileHandler(self.logfile_path, mode='w') self.handler.setLevel(logging.INFO) self.handler.setFormatter(logging.Formatter('%(message)s')) logging.getLogger().addHandler(self.handler) # Create an empty database self.test_conn = None print 'Create an empty database' self.test_dbname = dbutil.random_name("test_landsat_bandstack") print 'Creating %s' % self.test_dbname dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # Set the datacube configuration file to point to the empty database configuration_dict = { 'dbname': self.test_dbname, 'temp_dir': self.TEMP_DIR, 'tile_root': self.TILE_ROOT_DIR } config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Set an instance of the datacube and pass it to an ingester instance test_args = TestArgs() test_args.config_file = config_file_path test_args.debug = False test_datacube = IngesterDataCube(test_args) self.ingester = TestIngester(datacube=test_datacube) self.collection = self.ingester.collection def test_buildvrt_01(self): """Test of LandsatBandstack.buildvrt() method, test one.""" self.check_buildvrt(0) def test_buildvrt_02(self): """Test of LandsatBandstack.buildvrt() method, test two.""" self.check_buildvrt(1) def test_buildvrt_03(self): """Test of LandsatBandstack.buildvrt() method, test three.""" self.check_buildvrt(2) def test_buildvrt_04(self): """Test of LandsatBandstack.buildvrt() method, test four.""" self.check_buildvrt(3) def test_buildvrt_05(self): """Test of LandsatBandstack.buildvrt() method, test five.""" self.check_buildvrt(4) def test_buildvrt_06(self): """Test of LandsatBandstack.buildvrt() method, test six.""" self.check_buildvrt(5) def check_buildvrt(self, idataset): """Test the LandsatBandstack.buildvrt() method by comparing output to a file on disk""" assert idataset in range(len(DATASETS_TO_INGEST)) print 'Testing Dataset %s' % DATASETS_TO_INGEST[idataset] dset = LandsatDataset(DATASETS_TO_INGEST[idataset]) # Create a DatasetRecord instance so that we can access its # list_tile_types() method. In doing this we need to create a # collection object and entries on the acquisition and dataset # tables of the database. self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) self.collection.commit_transaction() tile_type_list = dset_record.list_tile_types() #Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = self.collection.get_temp_tile_directory() ls_bandstack.buildvrt(temp_dir) # Get benchmark vrt for comparision vrt_benchmark = os.path.join(self.BENCHMARK_DIR, os.path.basename(ls_bandstack.vrt_name)) diff_cmd = [ "diff", "-I", "[Ff]ilename", "%s" % vrt_benchmark, "%s" % ls_bandstack.vrt_name ] result = execute(diff_cmd, shell=False) if result['stdout'] != '': self.fail("Differences between vrt files:\n" + result['stdout']) if result['stderr'] != '': self.fail("Error in system diff command:\n" + result['stderr']) def tearDown(self): # # Flush the handler and remove it from the root logger. # self.handler.flush() root_logger = logging.getLogger() root_logger.removeHandler(self.handler) if self.test_dbname: print 'About to drop %s' % self.test_dbname dbutil.TESTSERVER.drop(self.test_dbname)