def setUp(self):
        """Set up ingester."""

        self.stopwatch = Stopwatch()

        updates = {'start_date': '01/03/2014',
                   'end_date': '15/03/2014',
                   'min_path': '090',
                   'max_path': '093',
                   'min_row': '087',
                   'max_row': '090'
                   }

        config_file = dbutil.update_config_file2(updates,
                                                 self.INPUT_DIR,
                                                 self.OUTPUT_DIR,
                                                 'test_datacube.conf')

        sys.argv = [sys.argv[0],
                    "--config=%s" % config_file,
                    "--source=%s" % self.SOURCE_DIR
                    ]

        self.ingester = LandsatIngester()
Esempio n. 2
0
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#===============================================================================
"""
    landsat_ingester.py - Ingester script for landsat datasets.
"""

import logging
from agdc.landsat_ingester import LandsatIngester

# Start ingest process
if __name__ == "__main__":

    #pylint:disable=invalid-name
    #
    # Top level variables are OK if this is the top level script.
    #

    ingester = LandsatIngester()

    if ingester.args.debug:
        logging.getLogger().setLevel(logging.DEBUG)

    ingester.ingest(ingester.args.source_dir)

    ingester.collection.cleanup()
class TestDatasetFiltering(unittest.TestCase):
    """Unit and performance tests for dataset filtering."""

    MODULE = 'landsat_ingester'
    SUITE = 'TestDatasetFiltering'

    INPUT_DIR = dbutil.input_directory(MODULE, SUITE)
    OUTPUT_DIR = dbutil.output_directory(MODULE, SUITE)
    EXPECTED_DIR = dbutil.expected_directory(MODULE, SUITE)

    SOURCE_DIR = '/g/data1/rs0/scenes/ARG25_V0.0/2014-03'

    def setUp(self):
        """Set up ingester."""

        self.stopwatch = Stopwatch()

        updates = {'start_date': '01/03/2014',
                   'end_date': '15/03/2014',
                   'min_path': '090',
                   'max_path': '093',
                   'min_row': '087',
                   'max_row': '090'
                   }

        config_file = dbutil.update_config_file2(updates,
                                                 self.INPUT_DIR,
                                                 self.OUTPUT_DIR,
                                                 'test_datacube.conf')

        sys.argv = [sys.argv[0],
                    "--config=%s" % config_file,
                    "--source=%s" % self.SOURCE_DIR
                    ]

        self.ingester = LandsatIngester()

    @staticmethod
    def dump_dataset_names(output_path, dataset_list):
        """Dump the names of the datasets to a file.

        This writes a list of basenames from the paths in dataset_list to
        a file at output_path."""

        out = open(output_path, 'w')

        for dataset_path in dataset_list:
            out.write(os.path.basename(dataset_path) + '\n')

        out.close()

    def check_datasets_list(self, output_path, expected_path):
        """If an expected datasets file exists, check to see if it matches."""

        if not os.path.isfile(expected_path):
            self.skipTest("Expected dataset list file not found.")
        else:
            try:
                subprocess.check_output(['diff', output_path, expected_path])
            except subprocess.CalledProcessError as err:
                self.fail("Filtered datasets do not match those expected:\n" +
                          err.output)

    def test_fast_filter(self):
        """Test the results of a fast (filename based) filter."""

        print ""
        print "Finding datasets ..."

        self.stopwatch.start()
        dataset_list = self.ingester.find_datasets(self.SOURCE_DIR)
        self.stopwatch.stop()
        (elapsed_time, cpu_time) = self.stopwatch.read()

        print ""
        print "%s datasets found." % len(dataset_list)
        print "elapsed time: %s" % elapsed_time
        print "cpu time: %s" % cpu_time

        print ""
        print "Doing fast filter ..."

        self.stopwatch.reset()
        self.stopwatch.start()
        filtered_list = self.ingester.fast_filter_datasets(dataset_list)
        self.stopwatch.stop()
        (elapsed_time, cpu_time) = self.stopwatch.read()

        print ""
        print "%s out of %s datasets remain." % \
            (len(filtered_list), len(dataset_list))
        print "elapsed time: %s" % elapsed_time
        print "cpu time: %s" % cpu_time
        print ""

        output_path = os.path.join(self.OUTPUT_DIR, 'fast_filter_datasets.txt')
        self.dump_dataset_names(output_path, filtered_list)

        expected_path = os.path.join(self.EXPECTED_DIR, 'filter_datasets.txt')
        self.check_datasets_list(output_path, expected_path)

    def test_metadata_filter(self):
        """Test the results of a metadata based filter."""

        print ""
        print "Finding datasets ..."

        self.stopwatch.start()
        dataset_list = self.ingester.find_datasets(self.SOURCE_DIR)
        self.stopwatch.stop()
        (elapsed_time, cpu_time) = self.stopwatch.read()

        print ""
        print "%s datasets found." % len(dataset_list)
        print "elapsed time: %s" % elapsed_time
        print "cpu time: %s" % cpu_time

        print ""
        print "Doing metadata filter ..."

        self.stopwatch.reset()
        self.stopwatch.start()

        filtered_list = []
        for dataset_path in dataset_list:
            dataset = self.ingester.open_dataset(dataset_path)
            try:
                self.ingester.filter_on_metadata(dataset)
            except DatasetError:
                pass
            else:
                filtered_list.append(dataset_path)

        self.stopwatch.stop()
        (elapsed_time, cpu_time) = self.stopwatch.read()

        print ""
        print "%s out of %s datasets remain." % \
            (len(filtered_list), len(dataset_list))
        print "elapsed time: %s" % elapsed_time
        print "cpu time: %s" % cpu_time
        print ""

        output_path = os.path.join(self.OUTPUT_DIR,
                                   'metadata_filter_datasets.txt')
        self.dump_dataset_names(output_path, filtered_list)

        expected_path = os.path.join(self.EXPECTED_DIR, 'filter_datasets.txt')
        self.check_datasets_list(output_path, expected_path)
Esempio n. 4
0
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#===============================================================================

"""
    landsat_ingester.py - Ingester script for landsat datasets.
"""

import logging
from agdc.landsat_ingester import LandsatIngester

# Start ingest process
if __name__ == "__main__":

    #pylint:disable=invalid-name
    #
    # Top level variables are OK if this is the top level script.
    #

    ingester = LandsatIngester()

    if ingester.args.debug:
        logging.getLogger().setLevel(logging.DEBUG)

    ingester.ingest(ingester.args.source_dir)

    ingester.collection.cleanup()