def read_blocks(self, urls, block_idx, dst, band=1): t0 = t_now() stats = [None for _ in urls] def extract_block(f, idx, t0=0): dst_slice = dst[idx, :, :] win = f.block_window(band, *block_idx) t1 = t_now() f.read(band, window=win, out=dst_slice) t2 = t_now() chunk_size = f.block_size(band, *block_idx) stats[idx] = SimpleNamespace(t_open=t1-t0, t_total=t2-t0, t0=t0, chunk_size=chunk_size) self._proc.process(enumerate(urls), extract_block, timer=t_now) t_total = t_now() - t0 params = SimpleNamespace(nthreads=self._nthreads, band=band, block_shape=dst.shape[1:], dtype=dst.dtype.name, block=block_idx) return dst, SimpleNamespace(stats=stats, params=params, t0=t0, t_total=t_total)
async def fetch_one(req, session, userdata): t0 = t_now() async with session.get(req.full_url, headers=req.headers) as response: t1 = t_now() data = await response.read() t2 = t_now() on_data(data, userdata, time=(t0, t1, t2))
def stage2(stream, connection_pool): for userdata, req, conn, t0 in stream: try: response = conn.getresponse() except IOError as e: # TODO: print(e) if on_error: on_error(e) continue t1 = t_now() if 200 <= response.code < 300: data = response.read() t2 = t_now() connection_pool.put( conn ) # Return connection to be re-used as early as possible on_data(data, userdata, time=(t0, t1, t2)) else: connection_pool.put( conn ) # Return connection to be re-used as early as possible if on_error: on_error(response)
def _warp_one(self, warpfile, rs): t0 = t_now() cnt = 10 while (cnt > 0): try: with rasterio.open(warpfile) as src: if src.crs == None: src.crs = CRS.from_epsg(4326) # create the virtual raster based on the standard rasterio attributes from the sample tiff and shapefile feature. with WarpedVRT(src, resampling=rs, crs=self.crs, transform=self.transform, height=self.rows, width=self.cols) as vrt: data = vrt.read(1) # print(type(vrt)) print("data shape =", data.shape) self.log.info( "_warp_one Completed {}".format(warpfile)) t_total = t_now() - t0 self.log.info("WARP - TIME - {} - {}".format( t_total, warpfile)) return data except rasterio.errors.RasterioIOError: print("Unexpected error:", sys.exc_info()[0]) print('oops', cnt) cnt = cnt - 1 time.sleep(4)
def read_blocks(self, urls, block_idx, dst, band=1): t0 = t_now() stats = [None for _ in urls] def extract_block(f, idx, t0=0): dst_slice = dst[idx, :, :] win = f.block_window(band, *block_idx) t1 = t_now() f.read(band, window=win, out=dst_slice) t2 = t_now() try: chunk_size = f.block_size(band, *block_idx) except rasterio.errors.RasterBlockError: print('Failed to read block size for {}'.format(f.name), file=sys.stderr) chunk_size = 0 # probably GDAL specific 0 sized tile stats[idx] = SimpleNamespace(t_open=t1 - t0, t_total=t2 - t0, t0=t0, chunk_size=chunk_size) self._proc.process(enumerate(urls), extract_block, timer=t_now) t_total = t_now() - t0 params = SimpleNamespace(nthreads=self._nthreads, band=band, block_shape=dst.shape[1:], dtype=dst.dtype.name, block=block_idx) return dst, SimpleNamespace(stats=stats, params=params, t0=t0, t_total=t_total)
def output_rasters_cloud(self, arr, outname): """ This function creates geotiff files from the model output arrays. """ if self.config_dict['path_mode'] == 'aws': # later on deleted by s3_delete_local() # local_outpath = os.path.join(self.config_dict['temp_folder'], outname) local_outname = outname.split('/')[-1] local_outpath = os.path.join(self.temp_folder, local_outname) self.log.debug('local_outpath {}'.format(local_outpath)) t0 = t_now() band1 = arr # write to a temp folder with rasterio.open(local_outpath, 'w', driver='GTiff', height=self.rows, width=self.cols, count=1, dtype='float64', crs=self.crs, transform=self.transform) as wrast: wrast.write(band1, indexes=1) # Buckets are not directories but you can treat them like they are # bucket_name = os.path.split(self.config_dict['out_root'])[0] # dev-et-data # bucket_prefix = os.path.split(self.config_dict['out_root'])[-1] # tile_modelrun1 bucket_name = self.config_dict['out_root'].split('/')[0] bucket_prefix_list = self.config_dict['out_root'].split('/')[1:] print(bucket_prefix_list) bucket_prefix = '/'.join(bucket_prefix_list) print("bucket prefix =", bucket_prefix) bucket_filepath = os.path.join( bucket_prefix, outname) # os.path.join(dev-et-data/tile_modelrun1, outname) # uploads to aws bucket with filepath self.s3_delete_local(local_file=local_outpath, bucket=bucket_name, bucket_filepath=bucket_filepath) t_total = t_now() - t0 self.log.info("OUTPUT - TIME - {} - {}".format( t_total, bucket_filepath)) elif self.config_dict['path_mode'] == 'google': print('google path mode not yet implemented') sys.exit(0) else: print( 'PATH MODE in config is not set properly for the cloud implementation of output_Rasters' ) sys.exit(0)
def extract_block(f, idx, t0=0): dst_slice = dst[idx, :, :] win = f.block_window(band, *block_idx) t1 = t_now() f.read(band, window=win, out=dst_slice) t2 = t_now() chunk_size = f.block_size(band, *block_idx) stats[idx] = SimpleNamespace(t_open=t1-t0, t_total=t2-t0, t0=t0, chunk_size=chunk_size)
def mk_cbk_ui(width='100%'): """ Create ipywidget and a callback to pass to `dc.load(progress_cbk=..)` :param width: Width of the UI, for example: '80%' '200px' '30em' """ from ipywidgets import VBox, HBox, Label, Layout, IntProgress from timeit import default_timer as t_now pbar = IntProgress(min=0, max=100, value=0, layout=Layout(width='100%')) lbl_right = Label("") lbl_left = Label("") info = HBox([lbl_left, lbl_right], layout=Layout(justify_content="space-between")) ui = VBox([info, HBox([pbar])], layout=Layout(width=width)) t0 = t_now() def cbk(n, ntotal): elapsed = t_now() - t0 pbar.max = ntotal pbar.value = n lbl_right.value = "{:d} of {:d}".format(n, ntotal) lbl_left.value = "FPS: {:.1f}".format(n/elapsed) return ui, cbk
def cbk(n, ntotal): elapsed = t_now() - t0 pbar.max = ntotal pbar.value = n lbl_right.value = "{:d} of {:d}".format(n, ntotal) lbl_left.value = "FPS: {:.1f}".format(n/elapsed)
def extract_block(f, idx, t0=0): dst_slice = dst[idx, :, :] win = f.block_window(band, *block_idx) t1 = t_now() f.read(band, window=win, out=dst_slice) t2 = t_now() try: chunk_size = f.block_size(band, *block_idx) except rasterio.errors.RasterBlockError: print('Failed to read block size for {}'.format(f.name), file=sys.stderr) chunk_size = 0 # probably GDAL specific 0 sized tile stats[idx] = SimpleNamespace(t_open=t1 - t0, t_total=t2 - t0, t0=t0, chunk_size=chunk_size)
def tif_read_block(url, block_idx, hdr_max_sz=4096, s3=None, dtype=None): t0 = t_now() if s3 is None: s3 = get_s3_client() hdr = tif_read_header(url, hdr_max_sz, s3=s3) t1 = t_now() if not isinstance(block_idx, int): block_idx = tif_tile_idx(hdr, *block_idx) if hdr.info.Compression not in (8, 0x80B2): raise ValueError('Only support DEFLATE compression (for now)') if hdr.info.Predictor not in (1, ): raise ValueError( 'Do not support horizontal differencing predictor (for now)') offset = hdr.info.TileOffsets[block_idx] nbytes = hdr.info.TileByteCounts[block_idx] bb = get_byte_range(url, offset, offset + nbytes, s3=s3) bb = zlib.decompress(bb) if dtype is None: return hdr, bb im = np.ndarray((hdr.info.TileLength, hdr.info.TileWidth), dtype=dtype, buffer=bb) if hdr.byteorder != sys.byteorder: im.byteswap(inplace=True) t2 = t_now() stats = SimpleNamespace(t_open=t1 - t0, t_total=t2 - t0, t0=t0, chunk_size=nbytes) return hdr, im, stats
def o_warp_one(self, warpfile, rs, crs, transform, rows, cols): t0 = t_now() if self._is_in_cache(warpfile): self.log.info( 'RESEARCH RETRIEVING NPY CACHE ITEM'.format(warpfile)) data = self._return_cache_data(warpfile) return data else: cnt = 10 while (cnt > 0): try: with rasterio.open(warpfile) as src: # create the virtual raster based on the standard rasterio attributes from the sample tiff and shapefile feature. with WarpedVRT(src, resampling=rs, crs=crs, transform=transform, height=rows, width=cols) as vrt: data = vrt.read(1) # print(type(vrt)) print("data shape =", data.shape) self.log.info( "o_warp_one Completed {}".format(warpfile)) t_total = t_now() - t0 self.log.info("WARP - TIME - {} - {}".format( t_total, warpfile)) if 'NDVI' in warpfile: t0 = t_now() self._cache_npy(warpfile, data) t_total = t_now() - t0 self.log.info( "Cache_Store - TIME - {} - {}".format( t_total, warpfile)) return data except rasterio.errors.RasterioIOError: print("Unexpected error:", sys.exc_info()[0]) print('oops', cnt) cnt = cnt - 1 time.sleep(4)
def read_blocks(self, urls, block_idx, dst, hdr_max_sz=4096): t0 = t_now() stats = [None for _ in urls] self._rdr_block(enumerate(urls), block_idx, dst, stats, hdr_max_sz=hdr_max_sz) t1 = t_now() params = SimpleNamespace(band=1, block_shape=dst.shape[1:], nthreads=self._nthreads, hdr_max_sz=hdr_max_sz, dtype=dst.dtype.name, block=block_idx) return dst, SimpleNamespace(params=params, t0=t0, t_total=t1 - t0, stats=stats)
def stage1(ud_reqs, connection_pool): for userdata, req in ud_reqs: conn = connection_pool.get() t0 = t_now() try: conn.request(req.method, req.selector, headers=req.headers) except IOError as e: # TODO: print(e) if on_error is not None: on_error(e) connection_pool.put(conn) continue yield (userdata, req, conn, t0)
def __init__(self): self.t0 = t_now() self.t_last = self.t0 self.n = 0
def __call__(self, *args): self.t_last = t_now() self.n += 1
def run_veg_et(self): print(''' _ _ ___ ___ _ _ | | | ___ ___ | __>|_ _|| || | | ' |/ ._>/ . || _> | | |_/|_/ |__/ \___.\_. ||___> |_| <_><_> <___' ''') start_dt = datetime.strptime( "{}-{:03d}".format(self.start_year, self.start_day), '%Y-%j') print(start_dt) end_dt = datetime.strptime( "{}-{:03d}".format(self.end_year, self.end_day), '%Y-%j') print(end_dt) time_interval = end_dt - start_dt num_days = time_interval.days print(num_days) accumulate_mode = self.accumulate_mode # initially set output_yearly_arrays and output_monhly array to False and you will change # them later depending on what is in the accumulate_mode list # todo - set these in config_dict. output_monthly_arr = False output_yearly_arr = False # step daily. It is false if not included by default. output_daily_arr = False output_daily_arr = True # Open static inputs and normalize them to standard numpy arrays # static inputs self.interception = self.pmanager.get_static_data( self.interception_settings) self.whc = self.pmanager.get_static_data(self.whc_settings) self.field_capacity = self.pmanager.get_static_data( self.field_capacity_settings) self.saturation = self.pmanager.get_static_data( self.saturation_settings) self.watermask = self.pmanager.get_static_data(self.watermask_settings) # package as a list static_inputs = [ self.interception, self.whc, self.field_capacity, self.saturation, self.watermask ] # normalizing. self.log.info("self.rmanager.normalize_to_std_grid_fast {}".format( static_inputs)) self.interception, self.whc, self.field_capacity, self.saturation, self.watermask \ = self.rmanager.normalize_to_std_grid_fast(inputs=static_inputs, resamplemethod='nearest') # set monthly and yearly cumulative arrays (use one of the numpys from the # static array that has been normalized): model_arr_shape = self.interception.shape # A total of six output arrays must be instantiated in case accumulate_mode != None # monthly et_month_cum_arr = np.zeros(model_arr_shape) dd_month_cum_arr = np.zeros(model_arr_shape) srf_month_cum_arr = np.zeros(model_arr_shape) etc_month_cum_arr = np.zeros(model_arr_shape) netet_month_cum_arr = np.zeros(model_arr_shape) # yearly rain_yearly_cum_arr = np.zeros(model_arr_shape) swe_yearly_cum_arr = np.zeros(model_arr_shape) et_yearly_cum_arr = np.zeros(model_arr_shape) dd_yearly_cum_arr = np.zeros(model_arr_shape) srf_yearly_cum_arr = np.zeros(model_arr_shape) etc_yearly_cum_arr = np.zeros(model_arr_shape) netet_yearly_cum_arr = np.zeros(model_arr_shape) # the soil water fraction and snowpack are none to start out. changing_swf = None changing_snwpck = None for i in range(num_days + 1): # so what day is it t0 = t_now() today = start_dt + timedelta(days=i) if i == 0: rain, swf, snwpck, swe, DDrain, SRf, etc, etasw, netet = self._run_water_bal( i, today, self.interception, self.whc, self.field_capacity, self.saturation, self.rf_coeff, self.k_factor, self.ndvi_factor, self.water_factor, self.bias_corr, self.alfa_factor, self.watermask, outdir=self.outdir, yest_snwpck=None, yest_swf=None, geoproperties_file=self.geoproperties_file, daily_mode=output_daily_arr) changing_swf = swf changing_snwpck = snwpck else: # see if today is a day that we need to output a monthly raster if 'monthly' in accumulate_mode: d = today.day mo = today.month yr = today.year output_monthly_arr = self._end_of_month(d, mo, yr) if 'yearly' in accumulate_mode: # todo - deal with Water Year mode later # this function does calendar years d = today.day mo = today.month if d == 31 and mo == 12: output_yearly_arr = True else: output_yearly_arr = False print('output monthly is {} and output yearly is {}'.format( output_monthly_arr, output_yearly_arr)) rain, swf, snwpck, swe, DDrain, SRf, etc, etasw, netet = self._run_water_bal( i, today, self.interception, self.whc, self.field_capacity, self.saturation, self.rf_coeff, self.k_factor, self.ndvi_factor, self.water_factor, self.bias_corr, self.alfa_factor, self.watermask, outdir=self.outdir, yest_snwpck=changing_snwpck, yest_swf=changing_swf, geoproperties_file=self.geoproperties_file, daily_mode=output_daily_arr) # monthly et_month_cum_arr += etasw dd_month_cum_arr += DDrain srf_month_cum_arr += SRf etc_month_cum_arr += etc netet_month_cum_arr += netet # yearly rain_yearly_cum_arr += rain swe_yearly_cum_arr += swe et_yearly_cum_arr += etasw dd_yearly_cum_arr += DDrain srf_yearly_cum_arr += SRf etc_yearly_cum_arr += etc netet_yearly_cum_arr += netet if output_monthly_arr: # function to create monthly output rasters for each variable self.rmanager.output_rasters( et_month_cum_arr, self.outdir, '{}/etasw_{}{:02d}.tif'.format(today.year, today.year, today.month)) self.rmanager.output_rasters( dd_month_cum_arr, self.outdir, '{}/dd_{}{:02d}.tif'.format(today.year, today.year, today.month)) self.rmanager.output_rasters( srf_month_cum_arr, self.outdir, '{}/srf_{}{:02d}.tif'.format(today.year, today.year, today.month)) self.rmanager.output_rasters( etc_month_cum_arr, self.outdir, '{}/etc_{}{:02d}.tif'.format(today.year, today.year, today.month)) self.rmanager.output_rasters( netet_month_cum_arr, self.outdir, '{}/netet_{}{:02d}.tif'.format(today.year, today.year, today.month)) # zero-out arrays to start the next month over. dd_month_cum_arr = np.zeros(model_arr_shape) srf_month_cum_arr = np.zeros(model_arr_shape) et_month_cum_arr = np.zeros(model_arr_shape) etc_month_cum_arr = np.zeros(model_arr_shape) netet_month_cum_arr = np.zeros(model_arr_shape) output_monthly_arr = False if output_yearly_arr: # function to create yearly output rasters for each variables self.rmanager.output_rasters( et_yearly_cum_arr, self.outdir, 'Annual/etasw_{}.tif'.format(today.year)) self.rmanager.output_rasters( dd_yearly_cum_arr, self.outdir, 'Annual/dd_{}.tif'.format(today.year)) self.rmanager.output_rasters( srf_yearly_cum_arr, self.outdir, 'Annual/srf_{}.tif'.format(today.year)) self.rmanager.output_rasters( etc_yearly_cum_arr, self.outdir, 'Annual/etc_{}.tif'.format(today.year)) self.rmanager.output_rasters( netet_yearly_cum_arr, self.outdir, 'Annual/netet_{}.tif'.format(today.year)) self.rmanager.output_rasters( rain_yearly_cum_arr, self.outdir, 'Annual/rain_{}.tif'.format(today.year)) self.rmanager.output_rasters( swe_yearly_cum_arr, self.outdir, 'Annual/swe_{}.tif'.format(today.year)) # zero-out arrays to start the next year over. rain_yearly_cum_arr = np.zeros(model_arr_shape) swe_yearly_cum_arr = np.zeros(model_arr_shape) dd_yearly_cum_arr = np.zeros(model_arr_shape) srf_yearly_cum_arr = np.zeros(model_arr_shape) et_yearly_cum_arr = np.zeros(model_arr_shape) etc_yearly_cum_arr = np.zeros(model_arr_shape) netet_yearly_cum_arr = np.zeros(model_arr_shape) output_yearly_arr = False changing_swf = swf changing_snwpck = snwpck t_total = t_now() - t0 self.log.info("DAY - TIME - {} - {}".format(t_total, today)) print('-------------------------------') print('THE END') s3_output_path = self.outdir print('SAVE LOG') s3_save_log_file(s3_output_path) veget_config_path = self.config_dict['veget_config_path'] s3_save_config_files(veget_config_path, s3_output_path)
def __call__(self, n=1): self.t_last = t_now() self.n += n