def test_create_ndvi_worldgrid(tempdir): """ This is really an integration test because it tests the following : - Creates of a NDVI worldgrid from HDF files - Writes the NDVI fractions containing the HDF data - Loads (using geographical coordinates) the area corresponding to the HDF from the created jgrid - Verify that the jgrid data matches the HDF data This require that jgrid read/write work properly as well as all the jgrid georeferencing """ script = os.path.join(test_utils.get_rastercube_dir(), 'scripts', 'create_ndvi_worldgrid.py') worldgrid = tempdir ndvi_dir = os.path.join(worldgrid, 'ndvi') qa_dir = os.path.join(worldgrid, 'qa') dates_csv = os.path.join(utils.get_data_dir(), '1_manual', 'ndvi_dates.2.csv') tile = 'h29v07' print 'dates_csv : ', dates_csv cmd = [ sys.executable, script, '--tile=%s' % tile, '--noconfirm', '--worldgrid=%s' % worldgrid, '--frac_ndates=1', '--dates_csv=%s' % dates_csv ] output = subprocess.check_output(cmd) # Verify that the header has the correct dates ndvi_header = jgrid.load(ndvi_dir) qa_header = jgrid.load(qa_dir) dates = [utils.format_date(ts) for ts in ndvi_header.timestamps_ms] assert dates == ['2000_02_18', '2000_03_05'] assert ndvi_header.num_dates_fracs == 2 # Load the HDF and the corresponding date from the jgrid and # check for consistency hdf_fname = os.path.join(utils.get_modis_hdf_dir(), '2000', 'MOD13Q1.A2000065.h29v07.005.2008238013448.hdf') f = modis.ModisHDF(hdf_fname) ndvi_ds = f.load_gdal_dataset(modis.MODIS_NDVI_DATASET_NAME) hdf_ndvi = ndvi_ds.ReadAsArray() hdf_qa = f.load_gdal_dataset(modis.MODIS_QA_DATASET_NAME).ReadAsArray() # Load from the jgrid using the lat/lng polygon of the HDF file # This means we also test georeferencing hdf_poly = gdal_utils.latlng_bounding_box_from_ds(ndvi_ds) xy_from, qa, qa_mask, ndvi, ndvi_mask = \ jgrid_utils.load_poly_latlng_from_multi_jgrids( [qa_header, ndvi_header], hdf_poly) assert ndvi.shape[:2] == hdf_ndvi.shape # Verify that the jgrid ndvi and the HDF ndvi store the same values assert_array_equal(hdf_ndvi, ndvi[:, :, 1]) assert_array_equal(hdf_qa, qa[:, :, 1])
def truncate_frac(frac_num, ndvi_root, qa_root): """ Given a frac_num, will truncate the first hdfs file to have a size of (frac_width, frac_height, frac_ndates) which should correspond to (400, 400, 200) """ _start = time.time() ndvi_header = jgrid.load(ndvi_root) qa_header = jgrid.load(qa_root) frac_d = 0 frac_id = (frac_num, frac_d) try: ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id)) qa = jgrid.read_frac(qa_header.frac_fname(frac_id)) except ValueError: print 'Fraction', frac_num, 'is corrupted!' print 'Solve the problem for frac_num:', frac_num, 'frac_d:', frac_d, 'and execute the script again' return # At this point, we just have to truncate the array if ndvi is not None: if ndvi.shape[2] > ndvi_header.frac_ndates: ndvi = ndvi[:, :, 0:ndvi_header.frac_ndates] ndvi_header.write_frac(frac_id, ndvi) else: print frac_num, ': NDVI already OK' else: print frac_num, ': NDVI is None' if qa is not None: if qa.shape[2] > qa_header.frac_ndates: qa = qa[:, :, 0:qa_header.frac_ndates] qa_header.write_frac(frac_id, qa) else: print frac_num, ': QA already OK' else: print frac_num, ': QA is None' print 'Processed %d, took %.02f [s]' % (frac_num, time.time() - _start) sys.stdout.flush()
def assert_grids_same(root1, root2): """ Asserts that two jgrids are the same (same header, same data). This assert that the two grids have the same x/y chunking, but NOT necessarily the same date chunking """ h1 = jgrid.load(root1) h2 = jgrid.load(root2) assert np.all(h1.timestamps_ms == h2.timestamps_ms) assert np.all(h1.shape == h2.shape) assert h1.num_fracs == h2.num_fracs fracs1 = h1.list_available_fracnums() fracs2 = h2.list_available_fracnums() assert np.all(fracs1 == fracs2) for frac_num in fracs1: data1 = h1.load_frac_by_num(frac_num) data2 = h2.load_frac_by_num(frac_num) assert_array_equal(data1, data2)
print 'Processed %d, took %.02f [s]' % (frac_num, time.time() - _start) sys.stdout.flush() if __name__ == '__main__': args = parser.parse_args() worldgrid = args.worldgrid ndvi_root = os.path.join(worldgrid, 'ndvi') qa_root = os.path.join(worldgrid, 'qa') nworkers = args.nworkers fraction = args.fraction assert jgrid.Header.exists(ndvi_root) print 'Reading headers from HDFS...' ndvi_header = jgrid.load(ndvi_root) qa_header = jgrid.load(qa_root) assert np.all(ndvi_header.timestamps_ms == qa_header.timestamps_ms) # -- Figure out the fractions we have to update print 'Looking for available fractions in HDFS...' fractions = ndvi_header.list_available_fracnums() if len(fractions) == 0: print 'No fractions to process - terminating' sys.exit(0) assert np.all(fractions == qa_header.list_available_fracnums()) if fraction is None:
def test_create_ndvi_worldgrid(tempdir): """ This is really an integration test because it tests the following : - Creates of a NDVI worldgrid from HDF files - Writes the NDVI fractions containing the HDF data - Loads (using geographical coordinates) the area corresponding to the HDF from the created jgrid - Verify that the jgrid data matches the HDF data This require that jgrid read/write work properly as well as all the jgrid georeferencing """ script = os.path.join(test_utils.get_rastercube_dir(), 'scripts', 'create_ndvi_worldgrid.py') worldgrid = tempdir ndvi_dir = os.path.join(worldgrid, 'ndvi') qa_dir = os.path.join(worldgrid, 'qa') dates_csv = os.path.join(utils.get_data_dir(), '1_manual', 'ndvi_dates.2.csv') tile = 'h29v07' print 'dates_csv : ', dates_csv cmd = [ sys.executable, script, '--tile=%s' % tile, '--noconfirm', '--worldgrid=%s' % worldgrid, '--frac_ndates=1', '--dates_csv=%s' % dates_csv ] output = subprocess.check_output(cmd) # Verify that the header has the correct dates ndvi_header = jgrid.load(ndvi_dir) qa_header = jgrid.load(qa_dir) dates = [utils.format_date(ts) for ts in ndvi_header.timestamps_ms] assert dates == ['2000_02_18', '2000_03_05'] assert ndvi_header.num_dates_fracs == 2 # Load the HDF and the corresponding date from the jgrid and # check for consistency hdf_fname = os.path.join(utils.get_modis_hdf_dir(), '2000', 'MOD13Q1.A2000065.h29v07.005.2008238013448.hdf') f = modis.ModisHDF(hdf_fname) ndvi_ds = f.load_gdal_dataset(modis.MODIS_NDVI_DATASET_NAME) hdf_ndvi = ndvi_ds.ReadAsArray() hdf_qa = f.load_gdal_dataset( modis.MODIS_QA_DATASET_NAME).ReadAsArray() # Load from the jgrid using the lat/lng polygon of the HDF file # This means we also test georeferencing hdf_poly = gdal_utils.latlng_bounding_box_from_ds(ndvi_ds) xy_from, qa, qa_mask, ndvi, ndvi_mask = \ jgrid_utils.load_poly_latlng_from_multi_jgrids( [qa_header, ndvi_header], hdf_poly) assert ndvi.shape[:2] == hdf_ndvi.shape # Verify that the jgrid ndvi and the HDF ndvi store the same values assert_array_equal(hdf_ndvi, ndvi[:, :, 1]) assert_array_equal(hdf_qa, qa[:, :, 1])
return ndvi, qa if __name__ == '__main__': args = parser.parse_args() frac_num = args.fraction frac_d = args.fraction_part frac_id = (frac_num, frac_d) modis_dir = utils.get_modis_hdf_dir() worldgrid = args.worldgrid ndvi_root = os.path.join(worldgrid, 'ndvi') qa_root = os.path.join(worldgrid, 'qa') assert jgrid.Header.exists(ndvi_root) ndvi_header = jgrid.load(ndvi_root) qa_header = jgrid.load(qa_root) fname = ndvi_header.frac_fname(frac_id) if not io.fs_exists(fname): print 'The selected fraction does not exist in HDFS' exit(0) assert np.all(ndvi_header.timestamps_ms == qa_header.timestamps_ms) # Select dates for the requested fraction_part start_date_i = ndvi_header.frac_ndates * frac_d end_date_i = np.amin([len(ndvi_header.timestamps_ms) - start_date_i, ndvi_header.frac_ndates]) selected_dates = ndvi_header.timestamps_ms[start_date_i:end_date_i] modgrid = grids.MODISGrid()
def complete_frac(frac_num, ndvi_root, qa_root, frac_tilename, tilename_fileindex): """ Given a frac_num, will make sure it contains data for all dates in ndvi_header.timestamps_ms """ _start = time.time() modgrid = grids.MODISGrid() ndvi_header = jgrid.load(ndvi_root) qa_header = jgrid.load(qa_root) tilename = frac_tilename[frac_num] tile_h, tile_v = modis.parse_tilename(tilename) hdf_files = tilename_fileindex[tilename] d_from = 0 d_to = ndvi_header.shape[2] // ndvi_header.frac_ndates + 1 frac_id = None frac_d = None # Find the most recent existing fraction and the most recent timestamp for frac_d in range(d_from, d_to)[::-1]: frac_id = (frac_num, frac_d) fname = ndvi_header.frac_fname(frac_id) if io.fs_exists(fname): break assert frac_id is not None assert frac_d is not None # Read the data of the most recent fraction in HDFS ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id)) qa = jgrid.read_frac(qa_header.frac_fname(frac_id)) assert ndvi.shape == qa.shape # Compute the index of the last date in HDFS most_recent_t = frac_d * ndvi_header.frac_ndates + ndvi.shape[2] i_range, j_range = modgrid.get_cell_indices_in_tile( frac_num, tile_h, tile_v) # At this point, we just have to complete with the missing dates for t in range(most_recent_t, len(ndvi_header.timestamps_ms)): ts = ndvi_header.timestamps_ms[t] fname = hdf_files[ts] new_ndvi, new_qa = read_ndvi_qa(fname, i_range, j_range) if ndvi.shape[2] == ndvi_header.frac_ndates: # Write a complete fraction frac_id = (frac_num, frac_d) ndvi_header.write_frac(frac_id, ndvi) qa_header.write_frac(frac_id, qa) # Prepare variables for a new fraction frac_d += 1 ndvi = new_ndvi[:, :, None] qa = new_qa[:, :, None] else: # TODO: If we end up completing multiple dates, we could preallocate # But for now, this is unlikely (we'll complete with the most # recent data) ndvi = np.concatenate([ndvi, new_ndvi[:, :, None]], axis=2) qa = np.concatenate([qa, new_qa[:, :, None]], axis=2) assert ndvi.shape == qa.shape # Write last incomplete fraction frac_id = (frac_num, frac_d) ndvi_header.write_frac(frac_id, ndvi) qa_header.write_frac(frac_id, qa) print 'Processed %d, appended %d dates, took %.02f [s]' % ( frac_num, len(ndvi_header.timestamps_ms) - most_recent_t, time.time() - _start) sys.stdout.flush()
def complete_frac(frac_num, ndvi_root, qa_root, frac_tilename, tilename_fileindex): """ Given a frac_num, will make sure it contains data for all dates in ndvi_header.timestamps_ms """ _start = time.time() modgrid = grids.MODISGrid() ndvi_header = jgrid.load(ndvi_root) qa_header = jgrid.load(qa_root) tilename = frac_tilename[frac_num] tile_h, tile_v = modis.parse_tilename(tilename) hdf_files = tilename_fileindex[tilename] d_from = 0 d_to = ndvi_header.shape[2] // ndvi_header.frac_ndates + 1 frac_id = None frac_d = None # Find the most recent existing fraction and the most recent timestamp for frac_d in range(d_from, d_to)[::-1]: frac_id = (frac_num, frac_d) fname = ndvi_header.frac_fname(frac_id) if io.fs_exists(fname): break assert frac_id is not None assert frac_d is not None # Read the data of the most recent fraction in HDFS ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id)) qa = jgrid.read_frac(qa_header.frac_fname(frac_id)) assert ndvi.shape == qa.shape # Compute the index of the last date in HDFS most_recent_t = frac_d * ndvi_header.frac_ndates + ndvi.shape[2] i_range, j_range = modgrid.get_cell_indices_in_tile(frac_num, tile_h, tile_v) # At this point, we just have to complete with the missing dates for t in range(most_recent_t, len(ndvi_header.timestamps_ms)): ts = ndvi_header.timestamps_ms[t] fname = hdf_files[ts] new_ndvi, new_qa = read_ndvi_qa(fname, i_range, j_range) if ndvi.shape[2] == ndvi_header.frac_ndates: # Write a complete fraction frac_id = (frac_num, frac_d) ndvi_header.write_frac(frac_id, ndvi) qa_header.write_frac(frac_id, qa) # Prepare variables for a new fraction frac_d += 1 ndvi = new_ndvi[:, :, None] qa = new_qa[:, :, None] else: # TODO: If we end up completing multiple dates, we could preallocate # But for now, this is unlikely (we'll complete with the most # recent data) ndvi = np.concatenate([ndvi, new_ndvi[:, :, None]], axis=2) qa = np.concatenate([qa, new_qa[:, :, None]], axis=2) assert ndvi.shape == qa.shape # Write last incomplete fraction frac_id = (frac_num, frac_d) ndvi_header.write_frac(frac_id, ndvi) qa_header.write_frac(frac_id, qa) print 'Processed %d, appended %d dates, took %.02f [s]' % ( frac_num, len(ndvi_header.timestamps_ms) - most_recent_t, time.time() - _start ) sys.stdout.flush()