def test_compare_different(self): "Compare two databases with differences." file_name = "test_compare_different_v3.txt" output = StringIO.StringIO() result = dbcompare.compare_databases(self.conn[0], self.conn[2], verbosity=3, output=output) output_file_path = os.path.join(self.OUTPUT_DIR, file_name) with open(output_file_path, "w") as output_file: output_file.write(output.getvalue()) self.assertFalse(result, "Databases with differences are " + "comparing as equal.") expected_file_path = os.path.join(self.EXPECTED_DIR, file_name) if os.path.isfile(expected_file_path): with open(expected_file_path) as expected_file: expected_str = expected_file.read() self.assertEqual(output.getvalue(), expected_str) else: self.skipTest("expected output file not found.")
def test_landsat_tiler(self): """Test the cataloging and tiling of Landsat scences and compare resulting database and tile contents with an ingestion benchmark""" # This test is intended as an example, and so is extensively # commented. # Open a log file if self.mode not in [0, 1, 2, 3]: self.skipTest('Skipping test_landsat_tiler since flag is not in [0, 1, 2, 3]') logfile_path = os.path.join(self.OUTPUT_DIR, "test_landsat_tiler.log") self.logfile = open(logfile_path, "w") # # Create the initial database # # Randomise the name to avoid collisions with other users. self.test_dbname = dbutil.random_name("test_tiler") # Create the database. print 'About to create dbase from %s' \ %(os.path.join(self.INPUT_DIR, "hypercube_empty.sql")) if self.mode != 1: dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # # Run dbupdater on the test database and save the result # # Create updated datacube_conf file with the new dbname and tile_root tile_root = os.path.join(self.OUTPUT_DIR, "tiles") configuration_dict = {'dbname': self.test_dbname, 'tile_root': tile_root} config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Run dbupdater ingest_dir = os.path.join(self.INPUT_DIR, 'tiler_testing') dbupdater_cmd = ["python", "dbupdater.py", "--debug", "--config=%s" % config_file_path, "--source=%s" % ingest_dir, "--removedblist", "--followsymlinks"] if self.mode != 1: subprocess.check_call(dbupdater_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Run landsat_tiler landsat_tiler_cmd = ["python", "landsat_tiler.py", "--config=%s" % config_file_path] if self.mode != 1: subprocess.check_call(landsat_tiler_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Save the updated database if self.mode != 1: dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR, "tiler_testing.sql") # # If an expected result exists then load it and compare # # Check for expected result if self.mode > 0 and os.path.isfile(os.path.join(self.EXPECTED_DIR, "tiler_testing.sql")): print 'starting to check differences' #MPHtemp create the output database if self.mode == 1: self.test_dbname = dbutil.random_name("tiler_testing") dbutil.TESTSERVER.create(self.test_dbname, self.OUTPUT_DIR, "tiler_testing.sql") #END MPHtemp # Create a randomised name... self.expected_dbname = dbutil.random_name("expected_tiler_testing") # load the database... dbutil.TESTSERVER.create(self.expected_dbname, self.EXPECTED_DIR, "tiler_testing.sql") # create database connections... self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname) self.expected_conn = \ dbutil.TESTSERVER.connect(self.expected_dbname) # and compare. dbases_agree = dbcompare.compare_databases(self.test_conn, self.expected_conn, output=self.logfile, verbosity=3) if self.mode == 2: #Compare databases and fail test if they differ assert dbases_agree, "Databases do not match." #Compare data within corresponding files of the EXPECTED_DIR and #OUTPUT_DIR. Get list of tile pathnames from EXPECTED and OUTPUT #databases' repsective tile tables. There is an assumption here #that, within each of the expected and output databases, #the tile basename uniquely defines both the tile_type_id and the #full tile pathname. However, if the tile type table has put ORTHO #bands to be of a new tile_type, then #corresponding tiles in expected and output may be of different #tile_type. So we need to have self.bands_expected and #self.bands_output expected_tile_dict, output_tile_dict = \ self.get_tile_pathnames(self.expected_conn, self.test_conn) tiles_expected = set(expected_tile_dict.keys()) tiles_output = set(output_tile_dict.keys()) tiles_expected_or_output = tiles_expected | tiles_output #Construct band source table as per datacube module self.bands_expected = \ self.construct_bands_source_dict(self.expected_conn) self.bands_output =\ self.construct_bands_source_dict(self.test_conn) #file_pattern to parse file name for information file_pattern = [r'(?P<sat>\w+)_(?P<sensor>\w+)_', r'(?P<processing_level>\w+)_', r'(?P<xindex>-*\d+)_(?P<yindex>-*\d+)_' r'(?P<year>\d+)-(?P<month>\d+)-' r'(?P<day>\d+)T(?P<hour>\d+)-(?P<minute>\d+)-', r'(?P<second_whole>\d+)\.(?P<second_fraction>\d+)' r'\.(?P<file_extension>.+)'] pattern = re.compile(''.join(file_pattern)) #Set up dictionary of pixel counts to be accumulated per #(procesing_level, tile_layer) over all tiles #0: total_pixel_count_expected #1: total_pixel_count_output #2: total_pixel_count_both #3: total_pixel_count_expected_not_output #4: total_pixel_count_output_not_expected pixel_count_dict = {} #Set up nested dicts of differece counts difference_count_dict = {} #For each tile in EXPECTED_DIR and OUTPUT_DIR, get pixel counts and #difference histograms #There are five dictionaries involved: ### tile_name_dict {'sat': LS5, 'sensor': TM, ...} ### bands_dict_expected: those bands from self.bands_expected ### corresponding to current tile's ### tile_type_id and (satellite, sensor) ### bands_dict_output: output database's correspondent to ### bands_dict_expected ### level_dict_expected: those bands from bands_dict_expected ### for which the processing level ### matches that for the current tile ### level_dict_output: output database's correspondent to ### level_dict_expected ### all_levels_info_dict [level_dict_expected, #### level_dict_output] ### for each processing level all_levels_info_dict = {} for tile_name in tiles_expected_or_output: print 'processing tile %s' %tile_name tile_type_id_expected = None tile_type_id_output = None fname_expected = None fname_output = None #If tile is in either database, extract tile_type and pathname if tile_name in tiles_expected: tile_type_id_expected, fname_expected = \ expected_tile_dict[tile_name] if tile_name in tiles_output: tile_type_id_output, fname_output = \ output_tile_dict[tile_name] #Extract information from the tile name and select #nested dictionary for this tile from bands table, #given the (sat, sensor) [or("DERIVED', 'PQA') for PQA], #which will be common to expected and output tiles, and the #tile_type_id, which may be different for expected and output matchobj = re.match(pattern, tile_name) tile_name_dict = matchobj.groupdict() full_key_expected = \ self.get_tiletype_sat_sens_level(tile_type_id_expected, tile_name_dict) full_key_output = \ self.get_tiletype_sat_sens_level(tile_type_id_output, tile_name_dict) #Following will raise assertion error if a tile's #tile_type_id has changed since benchmark ingestion full_key = self.check_equal_or_null(full_key_expected, full_key_output) level_dict_expected = {} level_dict_output = {} #full_key is (tile_type, sat, sensor, processing_level) if full_key in all_levels_info_dict: (level_dict_expected, level_dict_output) = \ all_levels_info_dict[full_key] if level_dict_expected == {} and full_key_expected != None: level_dict_expected = \ self.collect_source_bands(self.bands_expected, full_key) if level_dict_output == {} and full_key_output != None: level_dict_output = \ self.collect_source_bands(self.bands_output, full_key) if full_key not in all_levels_info_dict: all_levels_info_dict[full_key] = [level_dict_expected, level_dict_output] if all_levels_info_dict[full_key][0] == {} and \ level_dict_expected != {}: all_levels_info_dict[full_key][0] = level_dict_expected if all_levels_info_dict[full_key][1] == {} and \ level_dict_output != {}: all_levels_info_dict[full_key][1] = level_dict_output #Check that the number of bands is as expected, adding #singleton dimension if only one band ([data_expected, data_output], number_layers) = \ self.load_and_check(fname_expected, fname_output, level_dict_expected, level_dict_output) assert bool(fname_expected) == (data_expected != None) and \ bool(fname_output) == (data_output != None), \ "data array should exist if and only if fname exists" for ilayer in range(number_layers): #Define expected and output band data band_expected, dtype_expected = \ self.get_band_data(data_expected, ilayer) band_output, dtype_output = \ self.get_band_data(data_output, ilayer) assert (band_expected == None) == (dtype_expected == None)\ and (band_output == None) == (dtype_output == None), \ "band data should exist if and only if dtype exists" dtype_this = self.check_equal_or_null(dtype_expected, dtype_output) #calculate the number of bins required to store the #histogram of differences from this datatype if tile_name_dict['processing_level'] == 'PQA': #possible difference values are 0 through 16, #(number of tests which differ) bin_count = 16 + 1 else: #possible difference vals are min through max of dtype bin_count = numpy.iinfo(dtype_this).max - \ numpy.iinfo(dtype_this).min + 1 assert bin_count < 66000, "datatype is more than 16" \ "bits, need to add code to coarsen the" \ "histogram bins or use apriori max and" \ "min values of the data" #The histograms are per (level, layer). #Could have one histogram per (sat, sensor, level, layer) #and then, depending on verbosity, aggregate during report. #But for now, just key by (level, layer). result_key = (full_key[3], ilayer + 1) if result_key not in pixel_count_dict: pixel_count_dict[result_key] = numpy.zeros(shape=(5), dtype=numpy.uint64) difference_count_dict[result_key] = \ numpy.zeros(shape=(bin_count), dtype=numpy.uint64) pixel_count = pixel_count_dict[result_key] difference_count = difference_count_dict[result_key] if tile_name_dict['processing_level'] == 'PQA': if band_expected is None: band_expected = 0 if band_output is None: band_output = 0 #define index as those pixels with contiguity bit set index_expected = \ numpy.bitwise_and(band_expected, 1 << self.PQA_CONTIGUITY_BIT) > 0 index_output = \ numpy.bitwise_and(band_output, 1 << self.PQA_CONTIGUITY_BIT) > 0 else: #For NBAR and ORTHO use nodata_value nodata_value = \ level_dict_output[ilayer + 1]['nodata_value'] if band_expected is None: band_expected = nodata_value if band_output is None: band_output = nodata_value index_expected = band_expected != nodata_value index_output = band_output != nodata_value pixel_count[0] += numpy.count_nonzero(index_expected) pixel_count[1] += numpy.count_nonzero(index_output) pixel_count[2] += \ numpy.count_nonzero(numpy.logical_and(index_expected, index_output)) pixel_count[3] += \ numpy.count_nonzero(numpy.logical_and (index_expected, ~index_output)) pixel_count[4] += \ numpy.count_nonzero(numpy.logical_and (~index_expected, index_output)) #Only want to calculate differences at common pixels index_both = numpy.logical_and(index_expected, index_output) if numpy.count_nonzero(index_both) == 0: continue valid_data_expected = band_expected[index_both].ravel() valid_data_output = band_output[index_both].ravel() #Calculate difference histogram and add to running total if tile_name_dict['processing_level'] == 'PQA': difference = \ self.count_bitwise_diffs(valid_data_expected, valid_data_output) else: difference = abs(valid_data_output.astype(numpy.int64) - valid_data_expected.astype(numpy.int64)) hist, dummy_bin_edges = \ numpy.histogram(difference, numpy.array(range(bin_count + 1), dtype=numpy.uint64)) difference_count += hist #dereference band data band_expected = None band_output = None difference = None #end of layer loop #dereference tile data data_expected = None data_output = None #Output #for sat_sen, band_dict in all_bands_dict: fp = open(os.path.join(self.OUTPUT_DIR, 'Histogram_output.txt'), 'w') fp.writelines('##### COMPARISON OF TILED DATA IN FOLLOWING '\ 'DIRECTORES\n%s\n%s\n' %(self.EXPECTED_DIR, self.OUTPUT_DIR)) result_keys_processed = [] for full_key in all_levels_info_dict.keys(): dummy, dummy, dummy, processing_level = full_key top_layer_result_key = (processing_level, 1) if top_layer_result_key in result_keys_processed: continue fp.writelines('#### Processing Level: %s\n' %processing_level) level_dict_expected, level_dict_output = \ all_levels_info_dict[full_key] assert set(level_dict_expected.keys()) == \ set(level_dict_output.keys()), "different key sets" number_layers = len(level_dict_output.keys()) for this_layer in range(1, number_layers + 1): result_key = (processing_level, this_layer) result_keys_processed.append(result_key) fp.writelines('### tile_layer = %d\n' %this_layer) for key, val in level_dict_expected[this_layer].items(): if key == 'tile_layer' or key == 'level_name': continue outline = '# %s = %s' %(key, val) if str(level_dict_output[this_layer][key]) != str(val): outline = '%s (%s in output database)' \ %(outline, level_dict_output[this_layer][key]) fp.writelines('%s\n' %outline) #get key for pixel_count_dict and difference_count_dict #Print counts of pixels with valid data fp.writelines('#Valid data counts\n') pixel_count = pixel_count_dict[result_key] count_desc = ['Expected\t', 'Output\t\t', 'Common\t\t', 'Missing\t\t', 'Extra\t\t'] for desc, num in zip(count_desc, pixel_count): fp.writelines('\t\t%s%d\n' %(desc, num)) #Print histograms of differences in valid data fp.writelines('#Histogram of differences in valid data\n') difference_count = difference_count_dict[result_key] index_nonzero_bins = difference_count > 0 for bin_no in range(len(difference_count)): if index_nonzero_bins[bin_no]: fp.writelines('\t\tDifference of %d: %d\n' %(bin_no, difference_count[bin_no])) fp.close() else: if self.mode > 0: self.skipTest("Expected database save file not found.")
def test_onescene(self): """Test database update for a single scene.""" # This test is intended as an example, and so is extensively # commented. # Open a log file logfile_path = os.path.join(self.OUTPUT_DIR, "test_onescene.log") self.logfile = open(logfile_path, "w") # # Create the initial database # # Randomise the name to avoid collisons with other users. self.test_dbname = dbutil.random_name("test_onescene") # Create the database. dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # # Run dbupdater on the test database and save the result # # Create an updated datacube_conf file with the new dbname config_file_path = dbutil.update_config_file( self.test_dbname, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf" ) # Run dbupdater ingest_dir = os.path.join(self.INPUT_DIR, "onescene") dbupdater_cmd = [ "python", "dbupdater.py", "--debug", "--config=%s" % config_file_path, "--source=%s" % ingest_dir, "--removedblist", "--followsymlinks", ] subprocess.check_call(dbupdater_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Save the updated database dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR, "onescene.sql") # # If an expected result exists then load it and compare # # Check for expected result if os.path.isfile(os.path.join(self.EXPECTED_DIR, "onescene.sql")): # Create a randomised name... self.expected_dbname = dbutil.random_name("expected_onescene") # load the database... dbutil.TESTSERVER.create(self.expected_dbname, self.EXPECTED_DIR, "onescene.sql") # create database connections... self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname) self.expected_conn = dbutil.TESTSERVER.connect(self.expected_dbname) # and compare. self.assertTrue( dbcompare.compare_databases(self.test_conn, self.expected_conn, output=self.logfile, verbosity=3), "Databases do not match.", ) else: self.skipTest("Expected database save file not found.")
def test_compare_empty(self): "Compare two empty databases." result = dbcompare.compare_databases(self.conn[0], self.conn[1], verbosity=2) self.assertTrue(result, "Identical empty databases are " + "not comparing as equal.")