def test_create_ndvi_worldgrid(tempdir):
    """
    This is really an integration test because it tests the following :
    - Creates of a NDVI worldgrid from HDF files
    - Writes the NDVI fractions containing the HDF data
    - Loads (using geographical coordinates) the area corresponding to the
      HDF from the created jgrid
    - Verify that the jgrid data matches the HDF data

    This require that jgrid read/write work properly as well as all the jgrid
    georeferencing
    """
    script = os.path.join(test_utils.get_rastercube_dir(), 'scripts',
                          'create_ndvi_worldgrid.py')
    worldgrid = tempdir
    ndvi_dir = os.path.join(worldgrid, 'ndvi')
    qa_dir = os.path.join(worldgrid, 'qa')
    dates_csv = os.path.join(utils.get_data_dir(), '1_manual',
                             'ndvi_dates.2.csv')
    tile = 'h29v07'
    print 'dates_csv : ', dates_csv
    cmd = [
        sys.executable, script,
        '--tile=%s' % tile, '--noconfirm',
        '--worldgrid=%s' % worldgrid, '--frac_ndates=1',
        '--dates_csv=%s' % dates_csv
    ]
    output = subprocess.check_output(cmd)

    # Verify that the header has the correct dates
    ndvi_header = jgrid.load(ndvi_dir)
    qa_header = jgrid.load(qa_dir)
    dates = [utils.format_date(ts) for ts in ndvi_header.timestamps_ms]
    assert dates == ['2000_02_18', '2000_03_05']

    assert ndvi_header.num_dates_fracs == 2

    # Load the HDF and the corresponding date from the jgrid and
    # check for consistency
    hdf_fname = os.path.join(utils.get_modis_hdf_dir(), '2000',
                             'MOD13Q1.A2000065.h29v07.005.2008238013448.hdf')
    f = modis.ModisHDF(hdf_fname)
    ndvi_ds = f.load_gdal_dataset(modis.MODIS_NDVI_DATASET_NAME)

    hdf_ndvi = ndvi_ds.ReadAsArray()
    hdf_qa = f.load_gdal_dataset(modis.MODIS_QA_DATASET_NAME).ReadAsArray()

    # Load from the jgrid using the lat/lng polygon of the HDF file
    # This means we also test georeferencing
    hdf_poly = gdal_utils.latlng_bounding_box_from_ds(ndvi_ds)
    xy_from, qa, qa_mask, ndvi, ndvi_mask = \
        jgrid_utils.load_poly_latlng_from_multi_jgrids(
                [qa_header, ndvi_header], hdf_poly)
    assert ndvi.shape[:2] == hdf_ndvi.shape
    # Verify that the jgrid ndvi and the HDF ndvi store the same values
    assert_array_equal(hdf_ndvi, ndvi[:, :, 1])
    assert_array_equal(hdf_qa, qa[:, :, 1])
def truncate_frac(frac_num, ndvi_root, qa_root):
    """
    Given a frac_num, will truncate the first hdfs file to have a size of
    (frac_width, frac_height, frac_ndates) which should correspond to
    (400, 400, 200)
    """
    _start = time.time()
    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    frac_d = 0
    frac_id = (frac_num, frac_d)

    try:
        ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id))
        qa = jgrid.read_frac(qa_header.frac_fname(frac_id))
    except ValueError:
        print 'Fraction', frac_num, 'is corrupted!'
        print 'Solve the problem for frac_num:', frac_num, 'frac_d:', frac_d, 'and execute the script again'
        return
    
    # At this point, we just have to truncate the array
    if ndvi is not None:
        if ndvi.shape[2] > ndvi_header.frac_ndates:
            ndvi = ndvi[:, :, 0:ndvi_header.frac_ndates]
            ndvi_header.write_frac(frac_id, ndvi)
        else:
            print frac_num, ': NDVI already OK'
    else:
        print frac_num, ': NDVI is None'

    if qa is not None:
        if qa.shape[2] > qa_header.frac_ndates:
            qa = qa[:, :, 0:qa_header.frac_ndates]
            qa_header.write_frac(frac_id, qa)
        else:
            print frac_num, ': QA already OK'
    else:
        print frac_num, ': QA is None'

    print 'Processed %d, took %.02f [s]' % (frac_num, time.time() - _start)
    sys.stdout.flush()
def truncate_frac(frac_num, ndvi_root, qa_root):
    """
    Given a frac_num, will truncate the first hdfs file to have a size of
    (frac_width, frac_height, frac_ndates) which should correspond to
    (400, 400, 200)
    """
    _start = time.time()
    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    frac_d = 0
    frac_id = (frac_num, frac_d)

    try:
        ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id))
        qa = jgrid.read_frac(qa_header.frac_fname(frac_id))
    except ValueError:
        print 'Fraction', frac_num, 'is corrupted!'
        print 'Solve the problem for frac_num:', frac_num, 'frac_d:', frac_d, 'and execute the script again'
        return

    # At this point, we just have to truncate the array
    if ndvi is not None:
        if ndvi.shape[2] > ndvi_header.frac_ndates:
            ndvi = ndvi[:, :, 0:ndvi_header.frac_ndates]
            ndvi_header.write_frac(frac_id, ndvi)
        else:
            print frac_num, ': NDVI already OK'
    else:
        print frac_num, ': NDVI is None'

    if qa is not None:
        if qa.shape[2] > qa_header.frac_ndates:
            qa = qa[:, :, 0:qa_header.frac_ndates]
            qa_header.write_frac(frac_id, qa)
        else:
            print frac_num, ': QA already OK'
    else:
        print frac_num, ': QA is None'

    print 'Processed %d, took %.02f [s]' % (frac_num, time.time() - _start)
    sys.stdout.flush()
Exemple #4
0
def assert_grids_same(root1, root2):
    """
    Asserts that two jgrids are the same (same header, same data). This assert
    that the two grids have the same x/y chunking, but NOT necessarily the
    same date chunking
    """
    h1 = jgrid.load(root1)
    h2 = jgrid.load(root2)

    assert np.all(h1.timestamps_ms == h2.timestamps_ms)
    assert np.all(h1.shape == h2.shape)
    assert h1.num_fracs == h2.num_fracs

    fracs1 = h1.list_available_fracnums()
    fracs2 = h2.list_available_fracnums()
    assert np.all(fracs1 == fracs2)

    for frac_num in fracs1:
        data1 = h1.load_frac_by_num(frac_num)
        data2 = h2.load_frac_by_num(frac_num)
        assert_array_equal(data1, data2)
def assert_grids_same(root1, root2):
    """
    Asserts that two jgrids are the same (same header, same data). This assert
    that the two grids have the same x/y chunking, but NOT necessarily the
    same date chunking
    """
    h1 = jgrid.load(root1)
    h2 = jgrid.load(root2)

    assert np.all(h1.timestamps_ms == h2.timestamps_ms)
    assert np.all(h1.shape == h2.shape)
    assert h1.num_fracs == h2.num_fracs

    fracs1 = h1.list_available_fracnums()
    fracs2 = h2.list_available_fracnums()
    assert np.all(fracs1 == fracs2)

    for frac_num in fracs1:
        data1 = h1.load_frac_by_num(frac_num)
        data2 = h2.load_frac_by_num(frac_num)
        assert_array_equal(data1, data2)
    print 'Processed %d, took %.02f [s]' % (frac_num, time.time() - _start)
    sys.stdout.flush()


if __name__ == '__main__':
    args = parser.parse_args()
    worldgrid = args.worldgrid
    ndvi_root = os.path.join(worldgrid, 'ndvi')
    qa_root = os.path.join(worldgrid, 'qa')
    nworkers = args.nworkers
    fraction = args.fraction

    assert jgrid.Header.exists(ndvi_root)

    print 'Reading headers from HDFS...'
    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    assert np.all(ndvi_header.timestamps_ms == qa_header.timestamps_ms)

    # -- Figure out the fractions we have to update
    print 'Looking for available fractions in HDFS...'
    fractions = ndvi_header.list_available_fracnums()
    
    if len(fractions) == 0:
        print 'No fractions to process - terminating'
        sys.exit(0)

    assert np.all(fractions == qa_header.list_available_fracnums())

    if fraction is None:
def test_create_ndvi_worldgrid(tempdir):
    """
    This is really an integration test because it tests the following :
    - Creates of a NDVI worldgrid from HDF files
    - Writes the NDVI fractions containing the HDF data
    - Loads (using geographical coordinates) the area corresponding to the
      HDF from the created jgrid
    - Verify that the jgrid data matches the HDF data

    This require that jgrid read/write work properly as well as all the jgrid
    georeferencing
    """
    script = os.path.join(test_utils.get_rastercube_dir(), 'scripts',
                          'create_ndvi_worldgrid.py')
    worldgrid = tempdir
    ndvi_dir = os.path.join(worldgrid, 'ndvi')
    qa_dir = os.path.join(worldgrid, 'qa')
    dates_csv = os.path.join(utils.get_data_dir(), '1_manual',
                             'ndvi_dates.2.csv')
    tile = 'h29v07'
    print 'dates_csv : ', dates_csv
    cmd = [
        sys.executable,
        script,
        '--tile=%s' % tile,
        '--noconfirm',
        '--worldgrid=%s' % worldgrid,
        '--frac_ndates=1',
        '--dates_csv=%s' % dates_csv
    ]
    output = subprocess.check_output(cmd)

    # Verify that the header has the correct dates
    ndvi_header = jgrid.load(ndvi_dir)
    qa_header = jgrid.load(qa_dir)
    dates = [utils.format_date(ts) for ts in ndvi_header.timestamps_ms]
    assert dates == ['2000_02_18', '2000_03_05']

    assert ndvi_header.num_dates_fracs == 2

    # Load the HDF and the corresponding date from the jgrid and
    # check for consistency
    hdf_fname = os.path.join(utils.get_modis_hdf_dir(), '2000',
                             'MOD13Q1.A2000065.h29v07.005.2008238013448.hdf')
    f = modis.ModisHDF(hdf_fname)
    ndvi_ds = f.load_gdal_dataset(modis.MODIS_NDVI_DATASET_NAME)

    hdf_ndvi = ndvi_ds.ReadAsArray()
    hdf_qa = f.load_gdal_dataset(
            modis.MODIS_QA_DATASET_NAME).ReadAsArray()

    # Load from the jgrid using the lat/lng polygon of the HDF file
    # This means we also test georeferencing
    hdf_poly = gdal_utils.latlng_bounding_box_from_ds(ndvi_ds)
    xy_from, qa, qa_mask, ndvi, ndvi_mask = \
        jgrid_utils.load_poly_latlng_from_multi_jgrids(
                [qa_header, ndvi_header], hdf_poly)
    assert ndvi.shape[:2] == hdf_ndvi.shape
    # Verify that the jgrid ndvi and the HDF ndvi store the same values
    assert_array_equal(hdf_ndvi, ndvi[:, :, 1])
    assert_array_equal(hdf_qa, qa[:, :, 1])
Exemple #8
0
    return ndvi, qa


if __name__ == '__main__':
    args = parser.parse_args()
    frac_num = args.fraction
    frac_d = args.fraction_part
    frac_id = (frac_num, frac_d)
    modis_dir = utils.get_modis_hdf_dir()
    worldgrid = args.worldgrid
    ndvi_root = os.path.join(worldgrid, 'ndvi')
    qa_root = os.path.join(worldgrid, 'qa')

    assert jgrid.Header.exists(ndvi_root)

    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    fname = ndvi_header.frac_fname(frac_id)
    if not io.fs_exists(fname):
        print 'The selected fraction does not exist in HDFS'
        exit(0)

    assert np.all(ndvi_header.timestamps_ms == qa_header.timestamps_ms)

    # Select dates for the requested fraction_part
    start_date_i = ndvi_header.frac_ndates * frac_d
    end_date_i = np.amin([len(ndvi_header.timestamps_ms) - start_date_i, ndvi_header.frac_ndates])
    selected_dates = ndvi_header.timestamps_ms[start_date_i:end_date_i]

    modgrid = grids.MODISGrid()
def complete_frac(frac_num, ndvi_root, qa_root, frac_tilename,
                  tilename_fileindex):
    """
    Given a frac_num, will make sure it contains data for all dates in
    ndvi_header.timestamps_ms
    """
    _start = time.time()
    modgrid = grids.MODISGrid()
    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    tilename = frac_tilename[frac_num]
    tile_h, tile_v = modis.parse_tilename(tilename)
    hdf_files = tilename_fileindex[tilename]

    d_from = 0
    d_to = ndvi_header.shape[2] // ndvi_header.frac_ndates + 1

    frac_id = None
    frac_d = None
    # Find the most recent existing fraction and the most recent timestamp
    for frac_d in range(d_from, d_to)[::-1]:
        frac_id = (frac_num, frac_d)
        fname = ndvi_header.frac_fname(frac_id)
        if io.fs_exists(fname):
            break

    assert frac_id is not None
    assert frac_d is not None

    # Read the data of the most recent fraction in HDFS
    ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id))
    qa = jgrid.read_frac(qa_header.frac_fname(frac_id))

    assert ndvi.shape == qa.shape

    # Compute the index of the last date in HDFS
    most_recent_t = frac_d * ndvi_header.frac_ndates + ndvi.shape[2]

    i_range, j_range = modgrid.get_cell_indices_in_tile(
        frac_num, tile_h, tile_v)

    # At this point, we just have to complete with the missing dates
    for t in range(most_recent_t, len(ndvi_header.timestamps_ms)):
        ts = ndvi_header.timestamps_ms[t]
        fname = hdf_files[ts]

        new_ndvi, new_qa = read_ndvi_qa(fname, i_range, j_range)

        if ndvi.shape[2] == ndvi_header.frac_ndates:
            # Write a complete fraction
            frac_id = (frac_num, frac_d)
            ndvi_header.write_frac(frac_id, ndvi)
            qa_header.write_frac(frac_id, qa)
            # Prepare variables for a new fraction
            frac_d += 1
            ndvi = new_ndvi[:, :, None]
            qa = new_qa[:, :, None]
        else:
            # TODO: If we end up completing multiple dates, we could preallocate
            # But for now, this is unlikely (we'll complete with the most
            # recent data)
            ndvi = np.concatenate([ndvi, new_ndvi[:, :, None]], axis=2)
            qa = np.concatenate([qa, new_qa[:, :, None]], axis=2)

        assert ndvi.shape == qa.shape

    # Write last incomplete fraction
    frac_id = (frac_num, frac_d)
    ndvi_header.write_frac(frac_id, ndvi)
    qa_header.write_frac(frac_id, qa)

    print 'Processed %d, appended %d dates, took %.02f [s]' % (
        frac_num, len(ndvi_header.timestamps_ms) - most_recent_t,
        time.time() - _start)

    sys.stdout.flush()
def complete_frac(frac_num, ndvi_root, qa_root, frac_tilename, tilename_fileindex):
    """
    Given a frac_num, will make sure it contains data for all dates in
    ndvi_header.timestamps_ms
    """
    _start = time.time()
    modgrid = grids.MODISGrid()
    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    tilename = frac_tilename[frac_num]
    tile_h, tile_v = modis.parse_tilename(tilename)
    hdf_files = tilename_fileindex[tilename]

    d_from = 0
    d_to = ndvi_header.shape[2] // ndvi_header.frac_ndates + 1

    frac_id = None
    frac_d = None
    # Find the most recent existing fraction and the most recent timestamp
    for frac_d in range(d_from, d_to)[::-1]:
        frac_id = (frac_num, frac_d)
        fname = ndvi_header.frac_fname(frac_id)
        if io.fs_exists(fname):
            break

    assert frac_id is not None
    assert frac_d is not None

    # Read the data of the most recent fraction in HDFS
    ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id))
    qa = jgrid.read_frac(qa_header.frac_fname(frac_id))

    assert ndvi.shape == qa.shape

    # Compute the index of the last date in HDFS
    most_recent_t = frac_d * ndvi_header.frac_ndates + ndvi.shape[2]

    i_range, j_range = modgrid.get_cell_indices_in_tile(frac_num, tile_h, tile_v)

    # At this point, we just have to complete with the missing dates
    for t in range(most_recent_t, len(ndvi_header.timestamps_ms)):
        ts = ndvi_header.timestamps_ms[t]
        fname = hdf_files[ts]

        new_ndvi, new_qa = read_ndvi_qa(fname, i_range, j_range)

        if ndvi.shape[2] == ndvi_header.frac_ndates:
            # Write a complete fraction
            frac_id = (frac_num, frac_d)
            ndvi_header.write_frac(frac_id, ndvi)
            qa_header.write_frac(frac_id, qa)
            # Prepare variables for a new fraction
            frac_d += 1
            ndvi = new_ndvi[:, :, None]
            qa = new_qa[:, :, None]
        else:
            # TODO: If we end up completing multiple dates, we could preallocate
            # But for now, this is unlikely (we'll complete with the most
            # recent data)
            ndvi = np.concatenate([ndvi, new_ndvi[:, :, None]], axis=2)
            qa = np.concatenate([qa, new_qa[:, :, None]], axis=2)

        assert ndvi.shape == qa.shape

    # Write last incomplete fraction
    frac_id = (frac_num, frac_d)
    ndvi_header.write_frac(frac_id, ndvi)
    qa_header.write_frac(frac_id, qa)

    print 'Processed %d, appended %d dates, took %.02f [s]' % (
        frac_num, len(ndvi_header.timestamps_ms) - most_recent_t,
        time.time() - _start
    )

    sys.stdout.flush()