def test_kub_glencourse_flo2d_calibrate(self): out_base_dir = tempfile.mkdtemp(prefix='glencourse_') # rain = np.genfromtxt('/home/curw/Desktop/glen/rain.csv', delimiter=',', names=True, dtype=None, # converters={0: lambda s: dt.datetime.strptime(s.decode("utf-8"), '%Y-%m-%d %H:%M')}) rain = np.genfromtxt('/home/curw/Desktop/glen/rain.csv', delimiter=',', names=True, dtype=None) coord = np.genfromtxt('/home/curw/Desktop/glen/coordinates.csv', names=True, delimiter=',', dtype=None) stations = {} for i, c in enumerate(coord): n = c[0].decode('utf-8') stations[n] = [c[2], c[1]] points = np.genfromtxt( '/home/curw/git/models/curw/rainfall/wrf/resources/extraction/local/klb_glecourse_points_150m.txt', delimiter=',', names=['id', 'lon', 'lat'], dtype=[int, float, float]) thess_poly = spatial_utils.get_voronoi_polygons(stations, '/home/curw/git/models/curw/rainfall/wrf/resources/extraction' '/shp/klb_glencourse/klb_glencourse.shp', add_total_area=False, output_shape_file=os.path.join(out_base_dir, 'out.shp') ) region = [spatial_utils.is_inside_geo_df(thess_poly, points['lon'][i], points['lat'][i]) for i in range(len(points))] with open(os.path.join(out_base_dir, 'raincell.dat'), 'w') as out: for r in rain: for i, p in enumerate(points): out.write('%d %g\n' % (p[0], r[region[i]]))
def calc_station_fraction(self, stations, precision_decimal_points=3): """ Given station lat lon points must reside inside the KUB shape, otherwise could give incorrect results. :param stations: dict of station_name: [lon, lat] pairs :param precision_decimal_points: int :return: dict of station_id: area percentage """ if stations is None: raise ValueError("'stations' cannot be null.") station_list = stations.keys() if len(station_list) <= 0: raise ValueError("'stations' cannot be empty.") station_fractions = {} if len(station_list) < 3: for station in station_list: station_fractions[station] = np.round( self.percentage_factor / len(station_list), precision_decimal_points) return station_fractions station_fractions = {} total_area = 0 # calculate the voronoi/thesian polygons w.r.t given station points. voronoi_polygons = get_voronoi_polygons(points_dict=stations, shape_file=self.shape_file, add_total_area=True) for row in voronoi_polygons[['id', 'area']].itertuples(index=False, name=None): id = row[0] area = np.round(row[1], precision_decimal_points) station_fractions[id] = area # get_voronoi_polygons calculated total might not equal to sum of the rest, thus calculating total. if id != '__total_area__': total_area += area total_area = np.round(total_area, precision_decimal_points) for station in station_list: if station in station_fractions: station_fractions[station] = np.round( (station_fractions[station] * self.percentage_factor) / total_area, precision_decimal_points) else: station_fractions[station] = np.round( 0.0, precision_decimal_points) return station_fractions
def create_rainfall_for_mike21_obs(d0_rf_file, adapter, obs_stations, output_dir, start_ts, duration_days=None, kelani_lower_basin_shp=None): if kelani_lower_basin_shp is None: kelani_lower_basin_shp = res_mgr.get_resource_path('extraction/shp/klb-wgs84/klb-wgs84.shp') if duration_days is None: duration_days = (2, 3) obs_start = dt.datetime.strptime(start_ts, '%Y-%m-%d_%H:%M') - dt.timedelta(days=duration_days[0]) obs_end = dt.datetime.strptime(start_ts, '%Y-%m-%d_%H:%M') # forecast_end = dt.datetime.strptime(start_ts, '%Y-%m-%d_%H:%M') + dt.timedelta(days=duration_days[1]) obs = _get_observed_precip(obs_stations, obs_start, obs_end, duration_days, adapter) thess_poly = spatial_utils.get_voronoi_polygons(obs_stations, kelani_lower_basin_shp, add_total_area=False) observed = None for i, _id in enumerate(thess_poly['id']): if observed is not None: observed = observed + obs[_id].astype(float) * thess_poly['area'][i] else: observed = obs[_id].astype(float) * thess_poly['area'][i] observed = observed / sum(thess_poly['area']) d0 = np.genfromtxt(d0_rf_file, dtype=str) t0 = dt.datetime.strptime(' '.join(d0[0][:-1]), '%Y-%m-%d %H:%M:%S') t1 = dt.datetime.strptime(' '.join(d0[1][:-1]), '%Y-%m-%d %H:%M:%S') res_min = int((t1 - t0).total_seconds() / 60) # prev_output = np.append(prev_output, d0, axis=0) out_file = os.path.join(utils.create_dir_if_not_exists(output_dir), 'rf_mike21_obs.txt') with open(out_file, 'w') as out_f: for index in observed.index: out_f.write('%s:00\t%.4f\n' % (index, observed.precip[index])) forecast_start_idx = int( np.where((d0[:, 0] == obs_end.strftime('%Y-%m-%d')) & (d0[:, 1] == obs_end.strftime('%H:%M:%S')))[0]) # note: no need to convert to utc as rf_mike21.txt has times in LK for i in range(forecast_start_idx + 1, int(24 * 60 * duration_days[1] / res_min)): if i < len(d0): out_f.write('%s %s\t%s\n' % (d0[i][0], d0[i][1], d0[i][2])) else: out_f.write('%s\t0.0\n' % (obs_end + dt.timedelta(hours=i - forecast_start_idx - 1)).strftime( '%Y-%m-%d %H:%M:%S'))
def extract_kelani_basin_rainfall_flo2d_with_obs(nc_f, adapter, obs_stations, output_dir, start_ts_lk, duration_days=None, output_prefix='RAINCELL', kelani_lower_basin_points=None, kelani_lower_basin_shp=None): """ check test_extract_kelani_basin_rainfall_flo2d_obs test case :param nc_f: file path of the wrf output :param adapter: :param obs_stations: dict of stations. {station_name: [lon, lat, name variable, nearest wrf point station name]} :param output_dir: :param start_ts_lk: start time of the forecast/ end time of the observations :param duration_days: (optional) a tuple (observation days, forecast days) default (2,3) :param output_prefix: (optional) output file name of the RAINCELL file. ex: output_prefix=RAINCELL-150m --> RAINCELL-150m.DAT :param kelani_lower_basin_points: (optional) :param kelani_lower_basin_shp: (optional) :return: """ if duration_days is None: duration_days = (2, 3) if kelani_lower_basin_points is None: kelani_lower_basin_points = res_mgr.get_resource_path('extraction/local/kelani_basin_points_250m.txt') if kelani_lower_basin_shp is None: kelani_lower_basin_shp = res_mgr.get_resource_path('extraction/shp/klb-wgs84/klb-wgs84.shp') points = np.genfromtxt(kelani_lower_basin_points, delimiter=',') kel_lon_min = np.min(points, 0)[1] kel_lat_min = np.min(points, 0)[2] kel_lon_max = np.max(points, 0)[1] kel_lat_max = np.max(points, 0)[2] diff, kel_lats, kel_lons, times = ext_utils.extract_area_rf_series(nc_f, kel_lat_min, kel_lat_max, kel_lon_min, kel_lon_max) def get_bins(arr): sz = len(arr) return (arr[1:sz - 1] + arr[0:sz - 2]) / 2 lat_bins = get_bins(kel_lats) lon_bins = get_bins(kel_lons) t0 = dt.datetime.strptime(times[0], '%Y-%m-%d_%H:%M:%S') t1 = dt.datetime.strptime(times[1], '%Y-%m-%d_%H:%M:%S') utils.create_dir_if_not_exists(output_dir) obs_start = dt.datetime.strptime(start_ts_lk, '%Y-%m-%d_%H:%M') - dt.timedelta(days=duration_days[0]) obs_end = dt.datetime.strptime(start_ts_lk, '%Y-%m-%d_%H:%M') forecast_end = dt.datetime.strptime(start_ts_lk, '%Y-%m-%d_%H:%M') + dt.timedelta(days=duration_days[1]) obs = _get_observed_precip(obs_stations, obs_start, obs_end, duration_days, adapter) thess_poly = spatial_utils.get_voronoi_polygons(obs_stations, kelani_lower_basin_shp, add_total_area=False) output_file_path = os.path.join(output_dir, output_prefix + '.DAT') # update points array with the thessian polygon idx point_thess_idx = [] for point in points: point_thess_idx.append(spatial_utils.is_inside_geo_df(thess_poly, lon=point[1], lat=point[2])) pass with open(output_file_path, 'w') as output_file: res_mins = int((t1 - t0).total_seconds() / 60) data_hours = int(sum(duration_days) * 24 * 60 / res_mins) start_ts_lk = obs_start.strftime('%Y-%m-%d %H:%M:%S') end_ts = forecast_end.strftime('%Y-%m-%d %H:%M:%S') output_file.write("%d %d %s %s\n" % (res_mins, data_hours, start_ts_lk, end_ts)) for t in range(int(24 * 60 * duration_days[0] / res_mins) + 1): for i, point in enumerate(points): rf = float(obs[point_thess_idx[i]].values[t]) if point_thess_idx[i] is not None else 0 output_file.write('%d %.1f\n' % (point[0], rf)) forecast_start_idx = int( np.where(times == utils.datetime_lk_to_utc(obs_end, shift_mins=30).strftime('%Y-%m-%d_%H:%M:%S'))[0]) for t in range(int(24 * 60 * duration_days[1] / res_mins) - 1): for point in points: rf_x = np.digitize(point[1], lon_bins) rf_y = np.digitize(point[2], lat_bins) if t + forecast_start_idx + 1 < len(times): output_file.write('%d %.1f\n' % (point[0], diff[t + forecast_start_idx + 1, rf_y, rf_x])) else: output_file.write('%d %.1f\n' % (point[0], 0))
def create_kub_timeseries(adapter, stations, duration, opts): print(""" ********************************************************* * Create KUB Data * ********************************************************* """) # Duration args destruction start_date_time = duration.get('start_date_time', None) end_date_time = duration.get('end_date_time', None) # Opts args destruction force_insert = opts.get('force_insert', False) variables = ['Precipitation'] units = ['mm'] metaData = { 'station': 'Hanwella', 'variable': 'Precipitation', 'unit': 'mm', 'type': 'Observed', 'source': 'WeatherStation', 'name': 'WUnderground', } for i in range(0, len(variables)): print('variable:', variables[i], ' unit:', units[i]) meta = copy.deepcopy(metaData) meta['variable'] = variables[i] meta['unit'] = units[i] points = {} points_timeseries = {} # Get KUB basin shape file for checking weather station reside within it shp = ResourceManager.get_resource_path( 'shp/kelani-upper-basin/kelani-upper-basin.shp') shape_polygon = get_basin_shape(shp) for station in stations: print('\n**************** STATION **************') print('station:', station['name']) # Check whether station exists is_station_exists = adapter.get_station({'name': station['name']}) if is_station_exists is None: logging.warning( 'Station %s does not exists. Continue with others.', station['name']) continue # Check whether station reside within the basin # NOTE: No need to check weather inside the basin # if not Point(is_station_exists['longitude'], is_station_exists['latitude']).within(shape_polygon): # logging.warning('Station %s does not contains inside KUB. Continue with others', station['name']) # continue if 'basin' in station and station['basin'].lower() != 'kub': logging.warning( 'Station %s does not contains inside KUB. Continue with others', station['name']) continue meta['station'] = station['name'] if 'run_name' in station: meta['name'] = station['run_name'] # -- Get Processed Timeseries for this station event_id = adapter.get_event_id(meta) if event_id is None: logging.warning( 'Event Id for %s does not exists. Continue with others', station['name']) continue logging.debug('%s : eventId is %s. Search with %s', station['name'], event_id, meta) opts = { 'from': start_date_time.strftime("%Y-%m-%d %H:%M:%S"), 'to': end_date_time.strftime("%Y-%m-%d %H:%M:%S"), 'mode': Data.processed_data, } station_timeseries = adapter.retrieve_timeseries([event_id], opts) if len(station_timeseries) and len( station_timeseries[0]['timeseries']) > 0: station_timeseries = station_timeseries[0]['timeseries'] else: print('INFO: Timeseries does not have any data on :', end_date_time.strftime("%Y-%m-%d"), opts, station_timeseries) continue # -- Check whether timeseries worth to count in is_available = False if variables[i] in station['variables']: station_variable_index = station['variables'].index( variables[i]) min_values = station['min_values'] max_values = station['max_values'] validationObj = { 'max_value': max_values[station_variable_index], 'min_value': min_values[station_variable_index], } is_available = timeseries_availability(station_timeseries, validationObj) # -- If a valid timeseries, store for further use if is_available: points[station['name']] = [ is_station_exists['longitude'], is_station_exists['latitude'] ] points_timeseries[station['name']] = station_timeseries # -- END For Loop - Getting data from stations if len(points) < 1: logging.warning( "No station data found for given period of time. Abort...") print("No station data found for given period of time. Abort...") continue else: if is_unique_points(points): logging.info('Available stations %s', points) print('Available stations:', points) else: logging.warning( "Available points should be unique: %s. Abort...", points) print("Available points should be unique: %s. Abort..." % points) continue thiessen_dict = {} total_area = 0.0 # If there's one station, take as it's if len(points) is 1: thiessen_dict[list(points.keys())[0]] = 1 total_area = 1 # If there's two stations, take avarage of both elif len(points) is 2: thiessen_dict[list(points.keys())[0]] = 0.5 thiessen_dict[list(points.keys())[1]] = 0.5 total_area = 1 # If there's more than two stations, then create thiessen polygon else: logging.debug("Create thiessen polygon for KUB using points: %s", points) out = tempfile.mkdtemp(prefix='voronoi_') result = get_voronoi_polygons(points, shp, ['OBJECTID', 1], output_shape_file=os.path.join( out, 'out.shp')) print(result) for row in result.iterrows(): if row[1][0] is not '__total_area__': thiessen_dict[row[1][0]] = row[1][3] elif row[1][0] is '__total_area__': total_area = row[1][3] if total_area is 0.0: logging.warning('Total Area can not be 0.0') return upper_thiessen_values = OrderedDict() for t_station_name in thiessen_dict.keys(): thiessen_factor = thiessen_dict[t_station_name] / total_area for tt in points_timeseries.get(t_station_name, []): key = tt[0].timestamp() # If key doesn't contain in the dictionary, create new key if key not in upper_thiessen_values: # If precipitation is not a positive values, does not create a value for that timestamp if float(tt[1]) > -0.0001: upper_thiessen_values[key] = float( tt[1]) * thiessen_factor else: # Added to thiessen value at that timestamp upper_thiessen_values[key] += float(tt[1]) * ( thiessen_factor if float(tt[1]) > -0.0001 else 0) # Iterate through each timestamp kub_timeseries = [] for avg in upper_thiessen_values: d = datetime.fromtimestamp(avg) kub_timeseries.append([ d.strftime('%Y-%m-%d %H:%M:%S'), "%.3f" % upper_thiessen_values[avg] ]) # -- Create Station for KLB Obs is_kub_station = adapter.get_station({'name': 'KUB Obs'}) if is_kub_station is None: kub_station = \ (Station.CUrW, 'curw_kub_obs', 'KUB Obs', 7.111666667, 80.14983333, 0, "Kelani Upper Basin Observation") adapter.create_station(kub_station) # -- Store KLB Timeseries metaKUB = copy.deepcopy(metaData) metaKUB['station'] = 'KUB Obs' metaKUB['variable'] = variables[i] metaKUB['unit'] = units[i] metaKUB['name'] = 'KUB Obs Mean' klb_event_id = adapter.get_event_id(metaKUB) if klb_event_id is None: klb_event_id = adapter.create_event_id(metaKUB) print('HASH SHA256 created: ', klb_event_id) else: print('HASH SHA256 exists: ', klb_event_id) opts = { 'from': start_date_time.strftime("%Y-%m-%d %H:%M:%S"), 'to': end_date_time.strftime("%Y-%m-%d %H:%M:%S"), 'mode': Data.processed_data, } existingTimeseries = adapter.retrieve_timeseries(metaKUB, opts) if len(existingTimeseries) and len( existingTimeseries[0] ['timeseries']) > 0 and not force_insert: print( 'Timeseries already exists. Use force insert to insert data.\n' ) continue rowCount = \ adapter.insert_timeseries(klb_event_id, kub_timeseries, upsert=force_insert, mode=Data.processed_data) print('%s rows inserted.\n' % rowCount)