Beispiel #1
0
def read_frac(fname, hdfs_client=None):
    """
    This returns data or None if the fraction is empty
    """
    if not rasterio.fs_exists(fname, hdfs_client):
        return None
    else:
        if fname.startswith('hdfs://'):
            blob = rasterio.fs_read(fname, hdfs_client)
            return np.load(StringIO.StringIO(blob))
        else:
            # If reading from fs://, we short-circuit fs_read
            return np.load(rasterio.strip_uri_proto(fname, 'fs://'))
Beispiel #2
0
 def list_available_fractions(self, hdfs_client=None):
     """
     Returns the list of available (existing) fractions ids.
     Returns:
         a list of tuple (frac_num, time_chunk)
     """
     data_dir = os.path.join(self.grid_root, 'jdata')
     if not rasterio.fs_exists(data_dir, hdfs_client):
         return []
     else:
         fractions = rasterio.fs_list(data_dir, hdfs_client)
         # fractions is a list of fractions filenames (e.g. 14123.jdata)
         fractions = [frac_id_from_fname(fname) for fname in fractions
                      if fname.endswith('jdata')]
         return fractions
Beispiel #3
0
 def exists(grid_root, hdfs_client=None):
     fname = os.path.join(grid_root, 'header.jghdr3')
     return rasterio.fs_exists(fname, hdfs_client)
Beispiel #4
0
    args = parser.parse_args()
    frac_num = args.fraction
    frac_d = args.fraction_part
    frac_id = (frac_num, frac_d)
    modis_dir = utils.get_modis_hdf_dir()
    worldgrid = args.worldgrid
    ndvi_root = os.path.join(worldgrid, 'ndvi')
    qa_root = os.path.join(worldgrid, 'qa')

    assert jgrid.Header.exists(ndvi_root)

    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    fname = ndvi_header.frac_fname(frac_id)
    if not io.fs_exists(fname):
        print 'The selected fraction does not exist in HDFS'
        exit(0)

    assert np.all(ndvi_header.timestamps_ms == qa_header.timestamps_ms)

    # Select dates for the requested fraction_part
    start_date_i = ndvi_header.frac_ndates * frac_d
    end_date_i = np.amin([len(ndvi_header.timestamps_ms) - start_date_i, ndvi_header.frac_ndates])
    selected_dates = ndvi_header.timestamps_ms[start_date_i:end_date_i]

    modgrid = grids.MODISGrid()

    # Build a dict of frac_num:tilename
    tiles = config.MODIS_TERRA_TILES
    frac_tilename = {}
def complete_frac(frac_num, ndvi_root, qa_root, frac_tilename,
                  tilename_fileindex):
    """
    Given a frac_num, will make sure it contains data for all dates in
    ndvi_header.timestamps_ms
    """
    _start = time.time()
    modgrid = grids.MODISGrid()
    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    tilename = frac_tilename[frac_num]
    tile_h, tile_v = modis.parse_tilename(tilename)
    hdf_files = tilename_fileindex[tilename]

    d_from = 0
    d_to = ndvi_header.shape[2] // ndvi_header.frac_ndates + 1

    frac_id = None
    frac_d = None
    # Find the most recent existing fraction and the most recent timestamp
    for frac_d in range(d_from, d_to)[::-1]:
        frac_id = (frac_num, frac_d)
        fname = ndvi_header.frac_fname(frac_id)
        if io.fs_exists(fname):
            break

    assert frac_id is not None
    assert frac_d is not None

    # Read the data of the most recent fraction in HDFS
    ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id))
    qa = jgrid.read_frac(qa_header.frac_fname(frac_id))

    assert ndvi.shape == qa.shape

    # Compute the index of the last date in HDFS
    most_recent_t = frac_d * ndvi_header.frac_ndates + ndvi.shape[2]

    i_range, j_range = modgrid.get_cell_indices_in_tile(
        frac_num, tile_h, tile_v)

    # At this point, we just have to complete with the missing dates
    for t in range(most_recent_t, len(ndvi_header.timestamps_ms)):
        ts = ndvi_header.timestamps_ms[t]
        fname = hdf_files[ts]

        new_ndvi, new_qa = read_ndvi_qa(fname, i_range, j_range)

        if ndvi.shape[2] == ndvi_header.frac_ndates:
            # Write a complete fraction
            frac_id = (frac_num, frac_d)
            ndvi_header.write_frac(frac_id, ndvi)
            qa_header.write_frac(frac_id, qa)
            # Prepare variables for a new fraction
            frac_d += 1
            ndvi = new_ndvi[:, :, None]
            qa = new_qa[:, :, None]
        else:
            # TODO: If we end up completing multiple dates, we could preallocate
            # But for now, this is unlikely (we'll complete with the most
            # recent data)
            ndvi = np.concatenate([ndvi, new_ndvi[:, :, None]], axis=2)
            qa = np.concatenate([qa, new_qa[:, :, None]], axis=2)

        assert ndvi.shape == qa.shape

    # Write last incomplete fraction
    frac_id = (frac_num, frac_d)
    ndvi_header.write_frac(frac_id, ndvi)
    qa_header.write_frac(frac_id, qa)

    print 'Processed %d, appended %d dates, took %.02f [s]' % (
        frac_num, len(ndvi_header.timestamps_ms) - most_recent_t,
        time.time() - _start)

    sys.stdout.flush()
def complete_frac(frac_num, ndvi_root, qa_root, frac_tilename, tilename_fileindex):
    """
    Given a frac_num, will make sure it contains data for all dates in
    ndvi_header.timestamps_ms
    """
    _start = time.time()
    modgrid = grids.MODISGrid()
    ndvi_header = jgrid.load(ndvi_root)
    qa_header = jgrid.load(qa_root)

    tilename = frac_tilename[frac_num]
    tile_h, tile_v = modis.parse_tilename(tilename)
    hdf_files = tilename_fileindex[tilename]

    d_from = 0
    d_to = ndvi_header.shape[2] // ndvi_header.frac_ndates + 1

    frac_id = None
    frac_d = None
    # Find the most recent existing fraction and the most recent timestamp
    for frac_d in range(d_from, d_to)[::-1]:
        frac_id = (frac_num, frac_d)
        fname = ndvi_header.frac_fname(frac_id)
        if io.fs_exists(fname):
            break

    assert frac_id is not None
    assert frac_d is not None

    # Read the data of the most recent fraction in HDFS
    ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id))
    qa = jgrid.read_frac(qa_header.frac_fname(frac_id))

    assert ndvi.shape == qa.shape

    # Compute the index of the last date in HDFS
    most_recent_t = frac_d * ndvi_header.frac_ndates + ndvi.shape[2]

    i_range, j_range = modgrid.get_cell_indices_in_tile(frac_num, tile_h, tile_v)

    # At this point, we just have to complete with the missing dates
    for t in range(most_recent_t, len(ndvi_header.timestamps_ms)):
        ts = ndvi_header.timestamps_ms[t]
        fname = hdf_files[ts]

        new_ndvi, new_qa = read_ndvi_qa(fname, i_range, j_range)

        if ndvi.shape[2] == ndvi_header.frac_ndates:
            # Write a complete fraction
            frac_id = (frac_num, frac_d)
            ndvi_header.write_frac(frac_id, ndvi)
            qa_header.write_frac(frac_id, qa)
            # Prepare variables for a new fraction
            frac_d += 1
            ndvi = new_ndvi[:, :, None]
            qa = new_qa[:, :, None]
        else:
            # TODO: If we end up completing multiple dates, we could preallocate
            # But for now, this is unlikely (we'll complete with the most
            # recent data)
            ndvi = np.concatenate([ndvi, new_ndvi[:, :, None]], axis=2)
            qa = np.concatenate([qa, new_qa[:, :, None]], axis=2)

        assert ndvi.shape == qa.shape

    # Write last incomplete fraction
    frac_id = (frac_num, frac_d)
    ndvi_header.write_frac(frac_id, ndvi)
    qa_header.write_frac(frac_id, qa)

    print 'Processed %d, appended %d dates, took %.02f [s]' % (
        frac_num, len(ndvi_header.timestamps_ms) - most_recent_t,
        time.time() - _start
    )

    sys.stdout.flush()