コード例 #1
0
ファイル: test_dbcompare.py プロジェクト: ama-jharrison/agdc
    def test_compare_different(self):
        "Compare two databases with differences."

        file_name = "test_compare_different_v3.txt"

        output = StringIO.StringIO()
        result = dbcompare.compare_databases(self.conn[0], self.conn[2], verbosity=3, output=output)

        output_file_path = os.path.join(self.OUTPUT_DIR, file_name)
        with open(output_file_path, "w") as output_file:
            output_file.write(output.getvalue())

        self.assertFalse(result, "Databases with differences are " + "comparing as equal.")

        expected_file_path = os.path.join(self.EXPECTED_DIR, file_name)
        if os.path.isfile(expected_file_path):
            with open(expected_file_path) as expected_file:
                expected_str = expected_file.read()
            self.assertEqual(output.getvalue(), expected_str)
        else:
            self.skipTest("expected output file not found.")
コード例 #2
0
    def test_landsat_tiler(self):
        """Test the cataloging and tiling of Landsat scences and compare
        resulting database and tile contents with an ingestion benchmark"""
        # This test is intended as an example, and so is extensively
        # commented.
        # Open a log file
        if self.mode not in [0, 1, 2, 3]:
            self.skipTest('Skipping test_landsat_tiler since flag is not in [0, 1, 2, 3]')
        logfile_path = os.path.join(self.OUTPUT_DIR, "test_landsat_tiler.log")
        self.logfile = open(logfile_path, "w")

        #
        # Create the initial database
        #

        # Randomise the name to avoid collisions with other users.
        self.test_dbname = dbutil.random_name("test_tiler")

        # Create the database.
        print 'About to create dbase from %s' \
            %(os.path.join(self.INPUT_DIR, "hypercube_empty.sql"))
        if self.mode != 1:
            dbutil.TESTSERVER.create(self.test_dbname,
                                     self.INPUT_DIR, "hypercube_empty.sql")

        #
        # Run dbupdater on the test database and save the result
        #
        # Create updated datacube_conf file with the new dbname and tile_root
        tile_root = os.path.join(self.OUTPUT_DIR, "tiles")
        configuration_dict = {'dbname': self.test_dbname,
                              'tile_root': tile_root}
        config_file_path = dbutil.update_config_file2(configuration_dict,
                                                     self.INPUT_DIR,
                                                     self.OUTPUT_DIR,
                                                     "test_datacube.conf")
        # Run dbupdater

        ingest_dir = os.path.join(self.INPUT_DIR, 'tiler_testing')
        dbupdater_cmd = ["python",
                         "dbupdater.py",
                         "--debug",
                         "--config=%s" % config_file_path,
                         "--source=%s" % ingest_dir,
                         "--removedblist",
                         "--followsymlinks"]
        if self.mode != 1:
            subprocess.check_call(dbupdater_cmd, stdout=self.logfile,
                                  stderr=subprocess.STDOUT)

        # Run landsat_tiler
        landsat_tiler_cmd = ["python",
                             "landsat_tiler.py",
                             "--config=%s" % config_file_path]
        if self.mode != 1:
            subprocess.check_call(landsat_tiler_cmd, stdout=self.logfile,
                                  stderr=subprocess.STDOUT)
        # Save the updated database
        if self.mode != 1:
            dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR,
                                   "tiler_testing.sql")
        #
        # If an expected result exists then load it and compare
        #
        # Check for expected result
        if self.mode > 0 and os.path.isfile(os.path.join(self.EXPECTED_DIR,
                                                         "tiler_testing.sql")):
            print 'starting to check differences'
            #MPHtemp create the output database
            if self.mode == 1:
                self.test_dbname = dbutil.random_name("tiler_testing")
                dbutil.TESTSERVER.create(self.test_dbname,
                                         self.OUTPUT_DIR, "tiler_testing.sql")
            #END MPHtemp

            # Create a randomised name...
            self.expected_dbname = dbutil.random_name("expected_tiler_testing")

            # load the database...
            dbutil.TESTSERVER.create(self.expected_dbname,
                                     self.EXPECTED_DIR, "tiler_testing.sql")
            # create database connections...
            self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname)
            self.expected_conn = \
                dbutil.TESTSERVER.connect(self.expected_dbname)

            # and compare.
            dbases_agree = dbcompare.compare_databases(self.test_conn,
                                               self.expected_conn,
                                               output=self.logfile,
                                               verbosity=3)

            if self.mode == 2:
                #Compare databases and fail test if they differ
                assert dbases_agree, "Databases do not match."

            #Compare data within corresponding files of the EXPECTED_DIR and
            #OUTPUT_DIR. Get list of tile pathnames from EXPECTED and OUTPUT
            #databases' repsective tile tables. There is an assumption here
            #that, within each of the expected and output databases,
            #the tile basename uniquely defines both the tile_type_id and the
            #full tile pathname. However, if the tile type table has put ORTHO
            #bands to be of a new tile_type, then
            #corresponding tiles in expected and output may be of different
            #tile_type. So we need to have self.bands_expected and
            #self.bands_output
            expected_tile_dict, output_tile_dict = \
                self.get_tile_pathnames(self.expected_conn, self.test_conn)
            tiles_expected = set(expected_tile_dict.keys())
            tiles_output = set(output_tile_dict.keys())
            tiles_expected_or_output = tiles_expected | tiles_output

            #Construct band source table as per datacube module
            self.bands_expected = \
                self.construct_bands_source_dict(self.expected_conn)
            self.bands_output =\
                self.construct_bands_source_dict(self.test_conn)
            #file_pattern to parse file name for information
            file_pattern = [r'(?P<sat>\w+)_(?P<sensor>\w+)_',
                            r'(?P<processing_level>\w+)_',
                            r'(?P<xindex>-*\d+)_(?P<yindex>-*\d+)_'
                            r'(?P<year>\d+)-(?P<month>\d+)-'
                            r'(?P<day>\d+)T(?P<hour>\d+)-(?P<minute>\d+)-',
                            r'(?P<second_whole>\d+)\.(?P<second_fraction>\d+)'
                            r'\.(?P<file_extension>.+)']
            pattern = re.compile(''.join(file_pattern))
            #Set up dictionary of pixel counts to be accumulated per
            #(procesing_level, tile_layer) over all tiles
            #0: total_pixel_count_expected
            #1: total_pixel_count_output
            #2: total_pixel_count_both
            #3: total_pixel_count_expected_not_output
            #4: total_pixel_count_output_not_expected
            pixel_count_dict = {}
            #Set up nested dicts of differece counts
            difference_count_dict = {}

            #For each tile in EXPECTED_DIR and OUTPUT_DIR, get pixel counts and
            #difference histograms
            #There are five dictionaries involved:
            ### tile_name_dict             {'sat': LS5, 'sensor': TM, ...}
            ### bands_dict_expected:       those bands from self.bands_expected
            ###                            corresponding to current tile's
            ###                            tile_type_id and (satellite, sensor)
            ### bands_dict_output:         output database's correspondent to
            ###                            bands_dict_expected
            ### level_dict_expected:       those bands from bands_dict_expected
            ###                            for  which the processing level
            ###                            matches that for the current tile
            ### level_dict_output:         output database's correspondent to
            ###                            level_dict_expected
            ### all_levels_info_dict       [level_dict_expected,
            ####                           level_dict_output]
            ###                            for each processing level

            all_levels_info_dict = {}
            for tile_name in tiles_expected_or_output:
                print 'processing tile %s' %tile_name
                tile_type_id_expected = None
                tile_type_id_output = None
                fname_expected = None
                fname_output = None
                #If tile is in either database, extract tile_type and pathname
                if tile_name in tiles_expected:
                    tile_type_id_expected, fname_expected = \
                        expected_tile_dict[tile_name]
                if tile_name in tiles_output:
                    tile_type_id_output, fname_output = \
                        output_tile_dict[tile_name]
                #Extract information from the tile name and select
                #nested dictionary for this tile from bands table,
                #given the (sat, sensor) [or("DERIVED', 'PQA') for PQA],
                #which will be common to expected and output tiles, and the
                #tile_type_id, which may be different for expected and output
                matchobj = re.match(pattern, tile_name)
                tile_name_dict = matchobj.groupdict()
                full_key_expected = \
                    self.get_tiletype_sat_sens_level(tile_type_id_expected,
                                                tile_name_dict)
                full_key_output = \
                    self.get_tiletype_sat_sens_level(tile_type_id_output,
                                                tile_name_dict)
                #Following will raise assertion error if a tile's
                #tile_type_id has changed since benchmark ingestion
                full_key = self.check_equal_or_null(full_key_expected,
                                               full_key_output)
                level_dict_expected = {}
                level_dict_output = {}
                #full_key is (tile_type, sat, sensor, processing_level)
                if full_key in all_levels_info_dict:
                    (level_dict_expected, level_dict_output) = \
                        all_levels_info_dict[full_key]
                if level_dict_expected == {} and full_key_expected != None:
                    level_dict_expected = \
                        self.collect_source_bands(self.bands_expected,
                                                  full_key)
                if level_dict_output == {} and full_key_output != None:
                    level_dict_output = \
                        self.collect_source_bands(self.bands_output,
                                                  full_key)
                if full_key not in all_levels_info_dict:
                    all_levels_info_dict[full_key] = [level_dict_expected,
                                                       level_dict_output]
                if all_levels_info_dict[full_key][0] == {} and \
                        level_dict_expected != {}:
                    all_levels_info_dict[full_key][0] = level_dict_expected
                if all_levels_info_dict[full_key][1] == {} and \
                        level_dict_output != {}:
                    all_levels_info_dict[full_key][1] = level_dict_output

                #Check that the number of bands is as expected, adding
                #singleton dimension if only one band
                ([data_expected, data_output], number_layers) = \
                    self.load_and_check(fname_expected, fname_output,
                                        level_dict_expected,
                                        level_dict_output)
                assert bool(fname_expected) == (data_expected != None) and \
                    bool(fname_output) == (data_output != None), \
                    "data array should exist if and only if fname exists"

                for ilayer in range(number_layers):
                    #Define expected and output band data
                    band_expected, dtype_expected = \
                        self.get_band_data(data_expected, ilayer)
                    band_output, dtype_output = \
                        self.get_band_data(data_output, ilayer)
                    assert (band_expected == None) == (dtype_expected == None)\
                    and (band_output == None) == (dtype_output == None), \
                        "band data should exist if and only if dtype exists"
                    dtype_this = self.check_equal_or_null(dtype_expected,
                                                          dtype_output)
                    #calculate the number of bins required to store the
                    #histogram of differences from this datatype
                    if tile_name_dict['processing_level'] == 'PQA':
                        #possible difference values are 0 through 16,
                        #(number of tests which differ)
                        bin_count = 16 + 1
                    else:
                        #possible difference vals are min through max of dtype
                        bin_count = numpy.iinfo(dtype_this).max - \
                            numpy.iinfo(dtype_this).min + 1
                        assert bin_count < 66000, "datatype is more than 16" \
                            "bits, need to add code to coarsen the" \
                            "histogram bins or use apriori  max and" \
                            "min values of the data"
                    #The histograms are per (level, layer).
                    #Could have one histogram per (sat, sensor, level, layer)
                    #and then, depending on verbosity, aggregate during report.
                    #But for now, just key by (level, layer).
                    result_key = (full_key[3], ilayer + 1)
                    if result_key not in pixel_count_dict:
                        pixel_count_dict[result_key] = numpy.zeros(shape=(5),
                                                            dtype=numpy.uint64)
                        difference_count_dict[result_key] = \
                            numpy.zeros(shape=(bin_count), dtype=numpy.uint64)

                    pixel_count = pixel_count_dict[result_key]
                    difference_count = difference_count_dict[result_key]
                    if tile_name_dict['processing_level'] == 'PQA':
                        if band_expected is None:
                            band_expected = 0
                        if band_output is None:
                            band_output = 0
                        #define index as those pixels with contiguity bit set
                        index_expected = \
                            numpy.bitwise_and(band_expected,
                                              1 << self.PQA_CONTIGUITY_BIT) > 0
                        index_output = \
                            numpy.bitwise_and(band_output,
                                              1 << self.PQA_CONTIGUITY_BIT) > 0
                    else:
                        #For NBAR and ORTHO use nodata_value
                        nodata_value = \
                            level_dict_output[ilayer + 1]['nodata_value']
                        if band_expected is  None:
                            band_expected = nodata_value
                        if band_output is None:
                            band_output = nodata_value
                        index_expected = band_expected != nodata_value
                        index_output = band_output != nodata_value
                    pixel_count[0] += numpy.count_nonzero(index_expected)
                    pixel_count[1] += numpy.count_nonzero(index_output)
                    pixel_count[2] += \
                        numpy.count_nonzero(numpy.logical_and(index_expected,
                                                              index_output))
                    pixel_count[3] += \
                        numpy.count_nonzero(numpy.logical_and
                                            (index_expected, ~index_output))
                    pixel_count[4] += \
                        numpy.count_nonzero(numpy.logical_and
                                            (~index_expected, index_output))
                    #Only want to calculate differences at common pixels
                    index_both = numpy.logical_and(index_expected,
                                                   index_output)
                    if numpy.count_nonzero(index_both) == 0:
                        continue
                    valid_data_expected = band_expected[index_both].ravel()
                    valid_data_output = band_output[index_both].ravel()
                    #Calculate difference histogram and add to running total
                    if tile_name_dict['processing_level'] == 'PQA':
                        difference = \
                            self.count_bitwise_diffs(valid_data_expected,
                                                     valid_data_output)
                    else:
                        difference = abs(valid_data_output.astype(numpy.int64)
                                     - valid_data_expected.astype(numpy.int64))
                    hist, dummy_bin_edges = \
                        numpy.histogram(difference,
                                        numpy.array(range(bin_count + 1),
                                                    dtype=numpy.uint64))
                    difference_count += hist
                    #dereference band data
                    band_expected = None
                    band_output = None
                    difference = None
                    #end of layer loop
                #dereference tile data

                data_expected = None
                data_output = None

            #Output
            #for sat_sen, band_dict in all_bands_dict:
            fp = open(os.path.join(self.OUTPUT_DIR,
                                   'Histogram_output.txt'), 'w')
            fp.writelines('##### COMPARISON OF TILED DATA IN FOLLOWING '\
                              'DIRECTORES\n%s\n%s\n' %(self.EXPECTED_DIR,
                                                       self.OUTPUT_DIR))
            result_keys_processed = []
            for full_key in all_levels_info_dict.keys():
                dummy, dummy, dummy, processing_level = full_key
                top_layer_result_key = (processing_level, 1)
                if top_layer_result_key in result_keys_processed:
                    continue
                fp.writelines('#### Processing Level: %s\n' %processing_level)
                level_dict_expected, level_dict_output = \
                    all_levels_info_dict[full_key]
                assert set(level_dict_expected.keys()) == \
                       set(level_dict_output.keys()), "different key sets"
                number_layers = len(level_dict_output.keys())
                for this_layer in range(1, number_layers + 1):
                    result_key = (processing_level, this_layer)
                    result_keys_processed.append(result_key)
                    fp.writelines('### tile_layer = %d\n' %this_layer)
                    for key, val in level_dict_expected[this_layer].items():
                        if key == 'tile_layer' or key == 'level_name':
                            continue
                        outline = '# %s = %s' %(key, val)
                        if str(level_dict_output[this_layer][key]) != str(val):
                            outline = '%s (%s in output database)' \
                            %(outline, level_dict_output[this_layer][key])
                        fp.writelines('%s\n' %outline)
                    #get key for pixel_count_dict and difference_count_dict
                    #Print counts of pixels with valid data
                    fp.writelines('#Valid data counts\n')
                    pixel_count = pixel_count_dict[result_key]
                    count_desc = ['Expected\t', 'Output\t\t', 'Common\t\t',
                                  'Missing\t\t', 'Extra\t\t']
                    for desc, num in zip(count_desc, pixel_count):
                        fp.writelines('\t\t%s%d\n' %(desc, num))
                    #Print histograms of differences in valid data
                    fp.writelines('#Histogram of differences in valid data\n')
                    difference_count = difference_count_dict[result_key]
                    index_nonzero_bins = difference_count > 0
                    for bin_no in range(len(difference_count)):
                        if index_nonzero_bins[bin_no]:
                            fp.writelines('\t\tDifference of %d: %d\n'
                                          %(bin_no, difference_count[bin_no]))
            fp.close()
        else:
            if self.mode > 0:
                self.skipTest("Expected database save file not found.")
コード例 #3
0
ファイル: test_dbupdater.py プロジェクト: ama-jharrison/agdc
    def test_onescene(self):
        """Test database update for a single scene."""

        # This test is intended as an example, and so is extensively
        # commented.

        # Open a log file
        logfile_path = os.path.join(self.OUTPUT_DIR, "test_onescene.log")
        self.logfile = open(logfile_path, "w")

        #
        # Create the initial database
        #

        # Randomise the name to avoid collisons with other users.
        self.test_dbname = dbutil.random_name("test_onescene")

        # Create the database.
        dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql")

        #
        # Run dbupdater on the test database and save the result
        #

        # Create an updated datacube_conf file with the new dbname
        config_file_path = dbutil.update_config_file(
            self.test_dbname, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf"
        )

        # Run dbupdater

        ingest_dir = os.path.join(self.INPUT_DIR, "onescene")
        dbupdater_cmd = [
            "python",
            "dbupdater.py",
            "--debug",
            "--config=%s" % config_file_path,
            "--source=%s" % ingest_dir,
            "--removedblist",
            "--followsymlinks",
        ]
        subprocess.check_call(dbupdater_cmd, stdout=self.logfile, stderr=subprocess.STDOUT)

        # Save the updated database
        dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR, "onescene.sql")

        #
        # If an expected result exists then load it and compare
        #

        # Check for expected result
        if os.path.isfile(os.path.join(self.EXPECTED_DIR, "onescene.sql")):
            # Create a randomised name...
            self.expected_dbname = dbutil.random_name("expected_onescene")

            # load the database...
            dbutil.TESTSERVER.create(self.expected_dbname, self.EXPECTED_DIR, "onescene.sql")

            # create database connections...
            self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname)
            self.expected_conn = dbutil.TESTSERVER.connect(self.expected_dbname)

            # and compare.

            self.assertTrue(
                dbcompare.compare_databases(self.test_conn, self.expected_conn, output=self.logfile, verbosity=3),
                "Databases do not match.",
            )
        else:
            self.skipTest("Expected database save file not found.")
コード例 #4
0
ファイル: test_dbcompare.py プロジェクト: ama-jharrison/agdc
    def test_compare_empty(self):
        "Compare two empty databases."

        result = dbcompare.compare_databases(self.conn[0], self.conn[1], verbosity=2)
        self.assertTrue(result, "Identical empty databases are " + "not comparing as equal.")