コード例 #1
0
ファイル: systest.py プロジェクト: ama-jharrison/agdc
    def make_datacube_config(self, test_name, config):
        """Make a datacube config file using a template and test info."""

        template = config.get(test_name, 'datacube_config_template')
        (template_dir, template_name) = os.path.split(template)

        output_name = config.get(test_name, 'datacube_config_name')

        try:
            updates = {'dbname': self.dbname,
                       'temp_dir': self.temp_dir,
                       'tile_root': self.tile_dir}
            dbutil.update_config_file2(updates, template_dir, self.test_dir,
                                       template_name, output_name)

        except IOError as e:
            raise AssertionError('Unable to update datacube config file: %s' %
                                 e.strerror)
コード例 #2
0
    def setUp(self):
        #
        # Parse out the name of the test case and use it to name a logfile
        #
        match = re.search(r'\.([^\.]+)$', self.id())
        if match:
            name = match.group(1)
        else:
            name = 'TestIngester'
        logfile_name = "%s.log" % name
        self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name)
        self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name)
        if self.POPULATE_EXPECTED:
            self.logfile_path = os.path.join(self.EXPECTED_DIR, logfile_name)
        #
        # Set up a handler to log to the logfile, and attach it to the
        # root logger.
        #
        #logging.basicConfig()
        self.handler = logging.FileHandler(self.logfile_path, mode='w')
        self.handler.setLevel(logging.INFO)
        self.handler.setFormatter(logging.Formatter('%(message)s'))
        LOGGER.addHandler(self.handler)

        # Add a streamhandler to write output to console
        
        self.stream_handler = logging.StreamHandler(stream=sys.stdout)
        self.stream_handler.setLevel(logging.INFO)
        self.stream_handler.setFormatter(logging.Formatter('%(message)s'))
        LOGGER.addHandler(self.stream_handler)

        # Create an empty database
        self.test_conn = None
        self.test_dbname = dbutil.random_name("test_tile_record")
        LOGGER.info('Creating %s', self.test_dbname)
        dbutil.TESTSERVER.create(self.test_dbname,
                                     self.INPUT_DIR, "hypercube_empty.sql")

        # Set the datacube configuration file to point to the empty database
        configuration_dict = {'dbname': self.test_dbname,
                              'temp_dir': self.TEMP_DIR,
                              'tile_root': self.TILE_ROOT_DIR}
        config_file_path = dbutil.update_config_file2(configuration_dict,
                                                     self.INPUT_DIR,
                                                     self.OUTPUT_DIR,
                                                     "test_datacube.conf")

        # Set an instance of the datacube and pass it to an ingester instance
        test_args = TestArgs()
        test_args.config_file = config_file_path
        test_args.debug = False
        test_datacube = IngesterDataCube(test_args)
        self.ingester = TestIngester(datacube=test_datacube)
        self.collection = self.ingester.collection
コード例 #3
0
    def setUp(self):
        #
        # Parse out the name of the test case and use it to name a logfile
        #
        match = re.search(r'\.([^\.]+)$', self.id())
        if match:
            name = match.group(1)
        else:
            name = 'TestIngester'

        logfile_name = "%s.log" % name
        self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name)
        self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name)

        #
        # Set up a handler to log to the logfile, and attach it to the
        # root logger.
        #
        self.handler = logging.FileHandler(self.logfile_path, mode='w')
        self.handler.setLevel(logging.INFO)
        self.handler.setFormatter(logging.Formatter('%(message)s'))

        # Create an empty database
        self.test_conn = None
        print 'Create an empty database'
        self.test_dbname = dbutil.random_name("test_dataset_record")
        print 'Creating %s' %self.test_dbname
        dbutil.TESTSERVER.create(self.test_dbname,
                                     self.INPUT_DIR, "hypercube_empty.sql")

        # Set the datacube configuration file to point to the empty database
        configuration_dict = {'dbname': self.test_dbname}
        config_file_path = dbutil.update_config_file2(configuration_dict,
                                                     self.INPUT_DIR,
                                                     self.OUTPUT_DIR,
                                                     "test_datacube.conf")

        # Set an instance of the datacube and pass it to an ingester instance
        test_args = TestArgs()
        test_args.config_file = config_file_path
        test_args.debug = False
        test_datacube = IngesterDataCube(test_args)
        self.ingester = TestIngester(datacube=test_datacube)
        self.collection = self.ingester.collection
コード例 #4
0
    def setUp(self):
        """Set up ingester."""

        self.stopwatch = Stopwatch()

        updates = {
            "start_date": "01/03/2014",
            "end_date": "15/03/2014",
            "min_path": "090",
            "max_path": "093",
            "min_row": "087",
            "max_row": "090",
        }

        config_file = dbutil.update_config_file2(updates, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf")

        sys.argv = [sys.argv[0], "--config=%s" % config_file, "--source=%s" % self.SOURCE_DIR]

        self.ingester = LandsatIngester()
コード例 #5
0
ファイル: test_dbutil.py プロジェクト: ama-jharrison/agdc
    def test_update_config_file2(self):
        "Test config file update utility, version 2."

        input_dir = dbutil.input_directory(self.MODULE, self.SUITE)
        output_dir = dbutil.output_directory(self.MODULE, self.SUITE)
        expected_dir = dbutil.expected_directory(self.MODULE, self.SUITE)

        updates = {"dbname": "TEST_DBNAME", "temp_dir": "TEST_TEMP_DIR", "tile_root": "TEST_TILE_ROOT"}
        config_file_name = "test_datacube.conf"
        output_file_name = "test2_datacube.conf"

        output_path = dbutil.update_config_file2(updates, input_dir, output_dir, config_file_name, output_file_name)

        expected_path = os.path.join(expected_dir, output_file_name)
        if not os.path.isfile(expected_path):
            self.skipTest("Expected config file not found.")
        else:
            try:
                subprocess.check_output(["diff", output_path, expected_path])
            except subprocess.CalledProcessError as err:
                self.fail("Config file does not match expected result:\n" + err.output)
コード例 #6
0
    def setUp(self):
        """Set up ingester."""

        self.stopwatch = Stopwatch()

        updates = {'start_date': '01/03/2014',
                   'end_date': '15/03/2014',
                   'min_path': '090',
                   'max_path': '093',
                   'min_row': '087',
                   'max_row': '090'
                   }

        config_file = dbutil.update_config_file2(updates,
                                                 self.INPUT_DIR,
                                                 self.OUTPUT_DIR,
                                                 'test_datacube.conf')

        sys.argv = [sys.argv[0],
                    "--config=%s" % config_file,
                    "--source=%s" % self.SOURCE_DIR
                    ]

        self.ingester = LandsatIngester()
コード例 #7
0
    def test_landsat_tiler(self):
        """Test the cataloging and tiling of Landsat scences and compare
        resulting database and tile contents with an ingestion benchmark"""
        # This test is intended as an example, and so is extensively
        # commented.
        # Open a log file
        if self.mode not in [0, 1, 2, 3]:
            self.skipTest('Skipping test_landsat_tiler since flag is not in [0, 1, 2, 3]')
        logfile_path = os.path.join(self.OUTPUT_DIR, "test_landsat_tiler.log")
        self.logfile = open(logfile_path, "w")

        #
        # Create the initial database
        #

        # Randomise the name to avoid collisions with other users.
        self.test_dbname = dbutil.random_name("test_tiler")

        # Create the database.
        print 'About to create dbase from %s' \
            %(os.path.join(self.INPUT_DIR, "hypercube_empty.sql"))
        if self.mode != 1:
            dbutil.TESTSERVER.create(self.test_dbname,
                                     self.INPUT_DIR, "hypercube_empty.sql")

        #
        # Run dbupdater on the test database and save the result
        #
        # Create updated datacube_conf file with the new dbname and tile_root
        tile_root = os.path.join(self.OUTPUT_DIR, "tiles")
        configuration_dict = {'dbname': self.test_dbname,
                              'tile_root': tile_root}
        config_file_path = dbutil.update_config_file2(configuration_dict,
                                                     self.INPUT_DIR,
                                                     self.OUTPUT_DIR,
                                                     "test_datacube.conf")
        # Run dbupdater

        ingest_dir = os.path.join(self.INPUT_DIR, 'tiler_testing')
        dbupdater_cmd = ["python",
                         "dbupdater.py",
                         "--debug",
                         "--config=%s" % config_file_path,
                         "--source=%s" % ingest_dir,
                         "--removedblist",
                         "--followsymlinks"]
        if self.mode != 1:
            subprocess.check_call(dbupdater_cmd, stdout=self.logfile,
                                  stderr=subprocess.STDOUT)

        # Run landsat_tiler
        landsat_tiler_cmd = ["python",
                             "landsat_tiler.py",
                             "--config=%s" % config_file_path]
        if self.mode != 1:
            subprocess.check_call(landsat_tiler_cmd, stdout=self.logfile,
                                  stderr=subprocess.STDOUT)
        # Save the updated database
        if self.mode != 1:
            dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR,
                                   "tiler_testing.sql")
        #
        # If an expected result exists then load it and compare
        #
        # Check for expected result
        if self.mode > 0 and os.path.isfile(os.path.join(self.EXPECTED_DIR,
                                                         "tiler_testing.sql")):
            print 'starting to check differences'
            #MPHtemp create the output database
            if self.mode == 1:
                self.test_dbname = dbutil.random_name("tiler_testing")
                dbutil.TESTSERVER.create(self.test_dbname,
                                         self.OUTPUT_DIR, "tiler_testing.sql")
            #END MPHtemp

            # Create a randomised name...
            self.expected_dbname = dbutil.random_name("expected_tiler_testing")

            # load the database...
            dbutil.TESTSERVER.create(self.expected_dbname,
                                     self.EXPECTED_DIR, "tiler_testing.sql")
            # create database connections...
            self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname)
            self.expected_conn = \
                dbutil.TESTSERVER.connect(self.expected_dbname)

            # and compare.
            dbases_agree = dbcompare.compare_databases(self.test_conn,
                                               self.expected_conn,
                                               output=self.logfile,
                                               verbosity=3)

            if self.mode == 2:
                #Compare databases and fail test if they differ
                assert dbases_agree, "Databases do not match."

            #Compare data within corresponding files of the EXPECTED_DIR and
            #OUTPUT_DIR. Get list of tile pathnames from EXPECTED and OUTPUT
            #databases' repsective tile tables. There is an assumption here
            #that, within each of the expected and output databases,
            #the tile basename uniquely defines both the tile_type_id and the
            #full tile pathname. However, if the tile type table has put ORTHO
            #bands to be of a new tile_type, then
            #corresponding tiles in expected and output may be of different
            #tile_type. So we need to have self.bands_expected and
            #self.bands_output
            expected_tile_dict, output_tile_dict = \
                self.get_tile_pathnames(self.expected_conn, self.test_conn)
            tiles_expected = set(expected_tile_dict.keys())
            tiles_output = set(output_tile_dict.keys())
            tiles_expected_or_output = tiles_expected | tiles_output

            #Construct band source table as per datacube module
            self.bands_expected = \
                self.construct_bands_source_dict(self.expected_conn)
            self.bands_output =\
                self.construct_bands_source_dict(self.test_conn)
            #file_pattern to parse file name for information
            file_pattern = [r'(?P<sat>\w+)_(?P<sensor>\w+)_',
                            r'(?P<processing_level>\w+)_',
                            r'(?P<xindex>-*\d+)_(?P<yindex>-*\d+)_'
                            r'(?P<year>\d+)-(?P<month>\d+)-'
                            r'(?P<day>\d+)T(?P<hour>\d+)-(?P<minute>\d+)-',
                            r'(?P<second_whole>\d+)\.(?P<second_fraction>\d+)'
                            r'\.(?P<file_extension>.+)']
            pattern = re.compile(''.join(file_pattern))
            #Set up dictionary of pixel counts to be accumulated per
            #(procesing_level, tile_layer) over all tiles
            #0: total_pixel_count_expected
            #1: total_pixel_count_output
            #2: total_pixel_count_both
            #3: total_pixel_count_expected_not_output
            #4: total_pixel_count_output_not_expected
            pixel_count_dict = {}
            #Set up nested dicts of differece counts
            difference_count_dict = {}

            #For each tile in EXPECTED_DIR and OUTPUT_DIR, get pixel counts and
            #difference histograms
            #There are five dictionaries involved:
            ### tile_name_dict             {'sat': LS5, 'sensor': TM, ...}
            ### bands_dict_expected:       those bands from self.bands_expected
            ###                            corresponding to current tile's
            ###                            tile_type_id and (satellite, sensor)
            ### bands_dict_output:         output database's correspondent to
            ###                            bands_dict_expected
            ### level_dict_expected:       those bands from bands_dict_expected
            ###                            for  which the processing level
            ###                            matches that for the current tile
            ### level_dict_output:         output database's correspondent to
            ###                            level_dict_expected
            ### all_levels_info_dict       [level_dict_expected,
            ####                           level_dict_output]
            ###                            for each processing level

            all_levels_info_dict = {}
            for tile_name in tiles_expected_or_output:
                print 'processing tile %s' %tile_name
                tile_type_id_expected = None
                tile_type_id_output = None
                fname_expected = None
                fname_output = None
                #If tile is in either database, extract tile_type and pathname
                if tile_name in tiles_expected:
                    tile_type_id_expected, fname_expected = \
                        expected_tile_dict[tile_name]
                if tile_name in tiles_output:
                    tile_type_id_output, fname_output = \
                        output_tile_dict[tile_name]
                #Extract information from the tile name and select
                #nested dictionary for this tile from bands table,
                #given the (sat, sensor) [or("DERIVED', 'PQA') for PQA],
                #which will be common to expected and output tiles, and the
                #tile_type_id, which may be different for expected and output
                matchobj = re.match(pattern, tile_name)
                tile_name_dict = matchobj.groupdict()
                full_key_expected = \
                    self.get_tiletype_sat_sens_level(tile_type_id_expected,
                                                tile_name_dict)
                full_key_output = \
                    self.get_tiletype_sat_sens_level(tile_type_id_output,
                                                tile_name_dict)
                #Following will raise assertion error if a tile's
                #tile_type_id has changed since benchmark ingestion
                full_key = self.check_equal_or_null(full_key_expected,
                                               full_key_output)
                level_dict_expected = {}
                level_dict_output = {}
                #full_key is (tile_type, sat, sensor, processing_level)
                if full_key in all_levels_info_dict:
                    (level_dict_expected, level_dict_output) = \
                        all_levels_info_dict[full_key]
                if level_dict_expected == {} and full_key_expected != None:
                    level_dict_expected = \
                        self.collect_source_bands(self.bands_expected,
                                                  full_key)
                if level_dict_output == {} and full_key_output != None:
                    level_dict_output = \
                        self.collect_source_bands(self.bands_output,
                                                  full_key)
                if full_key not in all_levels_info_dict:
                    all_levels_info_dict[full_key] = [level_dict_expected,
                                                       level_dict_output]
                if all_levels_info_dict[full_key][0] == {} and \
                        level_dict_expected != {}:
                    all_levels_info_dict[full_key][0] = level_dict_expected
                if all_levels_info_dict[full_key][1] == {} and \
                        level_dict_output != {}:
                    all_levels_info_dict[full_key][1] = level_dict_output

                #Check that the number of bands is as expected, adding
                #singleton dimension if only one band
                ([data_expected, data_output], number_layers) = \
                    self.load_and_check(fname_expected, fname_output,
                                        level_dict_expected,
                                        level_dict_output)
                assert bool(fname_expected) == (data_expected != None) and \
                    bool(fname_output) == (data_output != None), \
                    "data array should exist if and only if fname exists"

                for ilayer in range(number_layers):
                    #Define expected and output band data
                    band_expected, dtype_expected = \
                        self.get_band_data(data_expected, ilayer)
                    band_output, dtype_output = \
                        self.get_band_data(data_output, ilayer)
                    assert (band_expected == None) == (dtype_expected == None)\
                    and (band_output == None) == (dtype_output == None), \
                        "band data should exist if and only if dtype exists"
                    dtype_this = self.check_equal_or_null(dtype_expected,
                                                          dtype_output)
                    #calculate the number of bins required to store the
                    #histogram of differences from this datatype
                    if tile_name_dict['processing_level'] == 'PQA':
                        #possible difference values are 0 through 16,
                        #(number of tests which differ)
                        bin_count = 16 + 1
                    else:
                        #possible difference vals are min through max of dtype
                        bin_count = numpy.iinfo(dtype_this).max - \
                            numpy.iinfo(dtype_this).min + 1
                        assert bin_count < 66000, "datatype is more than 16" \
                            "bits, need to add code to coarsen the" \
                            "histogram bins or use apriori  max and" \
                            "min values of the data"
                    #The histograms are per (level, layer).
                    #Could have one histogram per (sat, sensor, level, layer)
                    #and then, depending on verbosity, aggregate during report.
                    #But for now, just key by (level, layer).
                    result_key = (full_key[3], ilayer + 1)
                    if result_key not in pixel_count_dict:
                        pixel_count_dict[result_key] = numpy.zeros(shape=(5),
                                                            dtype=numpy.uint64)
                        difference_count_dict[result_key] = \
                            numpy.zeros(shape=(bin_count), dtype=numpy.uint64)

                    pixel_count = pixel_count_dict[result_key]
                    difference_count = difference_count_dict[result_key]
                    if tile_name_dict['processing_level'] == 'PQA':
                        if band_expected is None:
                            band_expected = 0
                        if band_output is None:
                            band_output = 0
                        #define index as those pixels with contiguity bit set
                        index_expected = \
                            numpy.bitwise_and(band_expected,
                                              1 << self.PQA_CONTIGUITY_BIT) > 0
                        index_output = \
                            numpy.bitwise_and(band_output,
                                              1 << self.PQA_CONTIGUITY_BIT) > 0
                    else:
                        #For NBAR and ORTHO use nodata_value
                        nodata_value = \
                            level_dict_output[ilayer + 1]['nodata_value']
                        if band_expected is  None:
                            band_expected = nodata_value
                        if band_output is None:
                            band_output = nodata_value
                        index_expected = band_expected != nodata_value
                        index_output = band_output != nodata_value
                    pixel_count[0] += numpy.count_nonzero(index_expected)
                    pixel_count[1] += numpy.count_nonzero(index_output)
                    pixel_count[2] += \
                        numpy.count_nonzero(numpy.logical_and(index_expected,
                                                              index_output))
                    pixel_count[3] += \
                        numpy.count_nonzero(numpy.logical_and
                                            (index_expected, ~index_output))
                    pixel_count[4] += \
                        numpy.count_nonzero(numpy.logical_and
                                            (~index_expected, index_output))
                    #Only want to calculate differences at common pixels
                    index_both = numpy.logical_and(index_expected,
                                                   index_output)
                    if numpy.count_nonzero(index_both) == 0:
                        continue
                    valid_data_expected = band_expected[index_both].ravel()
                    valid_data_output = band_output[index_both].ravel()
                    #Calculate difference histogram and add to running total
                    if tile_name_dict['processing_level'] == 'PQA':
                        difference = \
                            self.count_bitwise_diffs(valid_data_expected,
                                                     valid_data_output)
                    else:
                        difference = abs(valid_data_output.astype(numpy.int64)
                                     - valid_data_expected.astype(numpy.int64))
                    hist, dummy_bin_edges = \
                        numpy.histogram(difference,
                                        numpy.array(range(bin_count + 1),
                                                    dtype=numpy.uint64))
                    difference_count += hist
                    #dereference band data
                    band_expected = None
                    band_output = None
                    difference = None
                    #end of layer loop
                #dereference tile data

                data_expected = None
                data_output = None

            #Output
            #for sat_sen, band_dict in all_bands_dict:
            fp = open(os.path.join(self.OUTPUT_DIR,
                                   'Histogram_output.txt'), 'w')
            fp.writelines('##### COMPARISON OF TILED DATA IN FOLLOWING '\
                              'DIRECTORES\n%s\n%s\n' %(self.EXPECTED_DIR,
                                                       self.OUTPUT_DIR))
            result_keys_processed = []
            for full_key in all_levels_info_dict.keys():
                dummy, dummy, dummy, processing_level = full_key
                top_layer_result_key = (processing_level, 1)
                if top_layer_result_key in result_keys_processed:
                    continue
                fp.writelines('#### Processing Level: %s\n' %processing_level)
                level_dict_expected, level_dict_output = \
                    all_levels_info_dict[full_key]
                assert set(level_dict_expected.keys()) == \
                       set(level_dict_output.keys()), "different key sets"
                number_layers = len(level_dict_output.keys())
                for this_layer in range(1, number_layers + 1):
                    result_key = (processing_level, this_layer)
                    result_keys_processed.append(result_key)
                    fp.writelines('### tile_layer = %d\n' %this_layer)
                    for key, val in level_dict_expected[this_layer].items():
                        if key == 'tile_layer' or key == 'level_name':
                            continue
                        outline = '# %s = %s' %(key, val)
                        if str(level_dict_output[this_layer][key]) != str(val):
                            outline = '%s (%s in output database)' \
                            %(outline, level_dict_output[this_layer][key])
                        fp.writelines('%s\n' %outline)
                    #get key for pixel_count_dict and difference_count_dict
                    #Print counts of pixels with valid data
                    fp.writelines('#Valid data counts\n')
                    pixel_count = pixel_count_dict[result_key]
                    count_desc = ['Expected\t', 'Output\t\t', 'Common\t\t',
                                  'Missing\t\t', 'Extra\t\t']
                    for desc, num in zip(count_desc, pixel_count):
                        fp.writelines('\t\t%s%d\n' %(desc, num))
                    #Print histograms of differences in valid data
                    fp.writelines('#Histogram of differences in valid data\n')
                    difference_count = difference_count_dict[result_key]
                    index_nonzero_bins = difference_count > 0
                    for bin_no in range(len(difference_count)):
                        if index_nonzero_bins[bin_no]:
                            fp.writelines('\t\tDifference of %d: %d\n'
                                          %(bin_no, difference_count[bin_no]))
            fp.close()
        else:
            if self.mode > 0:
                self.skipTest("Expected database save file not found.")