コード例 #1
0
ファイル: extractor.py プロジェクト: hasithadkr7/wrf_docker
    def test_kub_glencourse_flo2d_calibrate(self):
        out_base_dir = tempfile.mkdtemp(prefix='glencourse_')

        # rain = np.genfromtxt('/home/curw/Desktop/glen/rain.csv', delimiter=',', names=True, dtype=None,
        #                      converters={0: lambda s: dt.datetime.strptime(s.decode("utf-8"), '%Y-%m-%d %H:%M')})

        rain = np.genfromtxt('/home/curw/Desktop/glen/rain.csv', delimiter=',', names=True, dtype=None)

        coord = np.genfromtxt('/home/curw/Desktop/glen/coordinates.csv', names=True, delimiter=',', dtype=None)
        stations = {}
        for i, c in enumerate(coord):
            n = c[0].decode('utf-8')
            stations[n] = [c[2], c[1]]

        points = np.genfromtxt(
            '/home/curw/git/models/curw/rainfall/wrf/resources/extraction/local/klb_glecourse_points_150m.txt',
            delimiter=',', names=['id', 'lon', 'lat'], dtype=[int, float, float])

        thess_poly = spatial_utils.get_voronoi_polygons(stations,
                                                        '/home/curw/git/models/curw/rainfall/wrf/resources/extraction'
                                                        '/shp/klb_glencourse/klb_glencourse.shp',
                                                        add_total_area=False,
                                                        output_shape_file=os.path.join(out_base_dir, 'out.shp')
                                                        )

        region = [spatial_utils.is_inside_geo_df(thess_poly, points['lon'][i], points['lat'][i]) for i in
                  range(len(points))]

        with open(os.path.join(out_base_dir, 'raincell.dat'), 'w') as out:
            for r in rain:
                for i, p in enumerate(points):
                    out.write('%d %g\n' % (p[0], r[region[i]]))
コード例 #2
0
ファイル: kub_mean.py プロジェクト: CUrW-SL/data_integration
    def calc_station_fraction(self, stations, precision_decimal_points=3):
        """
        Given station lat lon points must reside inside the KUB shape, otherwise could give incorrect results.
        :param stations: dict of station_name: [lon, lat] pairs
        :param precision_decimal_points: int
        :return: dict of station_id: area percentage
        """

        if stations is None:
            raise ValueError("'stations' cannot be null.")

        station_list = stations.keys()
        if len(station_list) <= 0:
            raise ValueError("'stations' cannot be empty.")

        station_fractions = {}
        if len(station_list) < 3:
            for station in station_list:
                station_fractions[station] = np.round(
                    self.percentage_factor / len(station_list),
                    precision_decimal_points)
            return station_fractions

        station_fractions = {}
        total_area = 0

        # calculate the voronoi/thesian polygons w.r.t given station points.
        voronoi_polygons = get_voronoi_polygons(points_dict=stations,
                                                shape_file=self.shape_file,
                                                add_total_area=True)

        for row in voronoi_polygons[['id', 'area']].itertuples(index=False,
                                                               name=None):
            id = row[0]
            area = np.round(row[1], precision_decimal_points)
            station_fractions[id] = area
            # get_voronoi_polygons calculated total might not equal to sum of the rest, thus calculating total.
            if id != '__total_area__':
                total_area += area
        total_area = np.round(total_area, precision_decimal_points)

        for station in station_list:
            if station in station_fractions:
                station_fractions[station] = np.round(
                    (station_fractions[station] * self.percentage_factor) /
                    total_area, precision_decimal_points)
            else:
                station_fractions[station] = np.round(
                    0.0, precision_decimal_points)

        return station_fractions
コード例 #3
0
def create_rainfall_for_mike21_obs(d0_rf_file, adapter, obs_stations, output_dir, start_ts, duration_days=None,
                                   kelani_lower_basin_shp=None):
    if kelani_lower_basin_shp is None:
        kelani_lower_basin_shp = res_mgr.get_resource_path('extraction/shp/klb-wgs84/klb-wgs84.shp')

    if duration_days is None:
        duration_days = (2, 3)

    obs_start = dt.datetime.strptime(start_ts, '%Y-%m-%d_%H:%M') - dt.timedelta(days=duration_days[0])
    obs_end = dt.datetime.strptime(start_ts, '%Y-%m-%d_%H:%M')
    # forecast_end = dt.datetime.strptime(start_ts, '%Y-%m-%d_%H:%M') + dt.timedelta(days=duration_days[1])

    obs = _get_observed_precip(obs_stations, obs_start, obs_end, duration_days, adapter)

    thess_poly = spatial_utils.get_voronoi_polygons(obs_stations, kelani_lower_basin_shp, add_total_area=False)

    observed = None
    for i, _id in enumerate(thess_poly['id']):
        if observed is not None:
            observed = observed + obs[_id].astype(float) * thess_poly['area'][i]
        else:
            observed = obs[_id].astype(float) * thess_poly['area'][i]
    observed = observed / sum(thess_poly['area'])

    d0 = np.genfromtxt(d0_rf_file, dtype=str)
    t0 = dt.datetime.strptime(' '.join(d0[0][:-1]), '%Y-%m-%d %H:%M:%S')
    t1 = dt.datetime.strptime(' '.join(d0[1][:-1]), '%Y-%m-%d %H:%M:%S')

    res_min = int((t1 - t0).total_seconds() / 60)

    # prev_output = np.append(prev_output, d0, axis=0)
    out_file = os.path.join(utils.create_dir_if_not_exists(output_dir), 'rf_mike21_obs.txt')

    with open(out_file, 'w') as out_f:
        for index in observed.index:
            out_f.write('%s:00\t%.4f\n' % (index, observed.precip[index]))

        forecast_start_idx = int(
            np.where((d0[:, 0] == obs_end.strftime('%Y-%m-%d')) & (d0[:, 1] == obs_end.strftime('%H:%M:%S')))[0])
        # note: no need to convert to utc as rf_mike21.txt has times in LK

        for i in range(forecast_start_idx + 1, int(24 * 60 * duration_days[1] / res_min)):
            if i < len(d0):
                out_f.write('%s %s\t%s\n' % (d0[i][0], d0[i][1], d0[i][2]))
            else:
                out_f.write('%s\t0.0\n' % (obs_end + dt.timedelta(hours=i - forecast_start_idx - 1)).strftime(
                    '%Y-%m-%d %H:%M:%S'))
コード例 #4
0
def extract_kelani_basin_rainfall_flo2d_with_obs(nc_f, adapter, obs_stations, output_dir, start_ts_lk,
                                                 duration_days=None, output_prefix='RAINCELL',
                                                 kelani_lower_basin_points=None, kelani_lower_basin_shp=None):
    """
    check test_extract_kelani_basin_rainfall_flo2d_obs test case
    :param nc_f: file path of the wrf output
    :param adapter:
    :param obs_stations: dict of stations. {station_name: [lon, lat, name variable, nearest wrf point station name]}
    :param output_dir:
    :param start_ts_lk: start time of the forecast/ end time of the observations
    :param duration_days: (optional) a tuple (observation days, forecast days) default (2,3)
    :param output_prefix: (optional) output file name of the RAINCELL file. ex: output_prefix=RAINCELL-150m --> RAINCELL-150m.DAT
    :param kelani_lower_basin_points: (optional)
    :param kelani_lower_basin_shp: (optional)
    :return:
    """
    if duration_days is None:
        duration_days = (2, 3)

    if kelani_lower_basin_points is None:
        kelani_lower_basin_points = res_mgr.get_resource_path('extraction/local/kelani_basin_points_250m.txt')

    if kelani_lower_basin_shp is None:
        kelani_lower_basin_shp = res_mgr.get_resource_path('extraction/shp/klb-wgs84/klb-wgs84.shp')

    points = np.genfromtxt(kelani_lower_basin_points, delimiter=',')

    kel_lon_min = np.min(points, 0)[1]
    kel_lat_min = np.min(points, 0)[2]
    kel_lon_max = np.max(points, 0)[1]
    kel_lat_max = np.max(points, 0)[2]

    diff, kel_lats, kel_lons, times = ext_utils.extract_area_rf_series(nc_f, kel_lat_min, kel_lat_max, kel_lon_min,
                                                                       kel_lon_max)

    def get_bins(arr):
        sz = len(arr)
        return (arr[1:sz - 1] + arr[0:sz - 2]) / 2

    lat_bins = get_bins(kel_lats)
    lon_bins = get_bins(kel_lons)

    t0 = dt.datetime.strptime(times[0], '%Y-%m-%d_%H:%M:%S')
    t1 = dt.datetime.strptime(times[1], '%Y-%m-%d_%H:%M:%S')

    utils.create_dir_if_not_exists(output_dir)

    obs_start = dt.datetime.strptime(start_ts_lk, '%Y-%m-%d_%H:%M') - dt.timedelta(days=duration_days[0])
    obs_end = dt.datetime.strptime(start_ts_lk, '%Y-%m-%d_%H:%M')
    forecast_end = dt.datetime.strptime(start_ts_lk, '%Y-%m-%d_%H:%M') + dt.timedelta(days=duration_days[1])

    obs = _get_observed_precip(obs_stations, obs_start, obs_end, duration_days, adapter)

    thess_poly = spatial_utils.get_voronoi_polygons(obs_stations, kelani_lower_basin_shp, add_total_area=False)

    output_file_path = os.path.join(output_dir, output_prefix + '.DAT')

    # update points array with the thessian polygon idx
    point_thess_idx = []
    for point in points:
        point_thess_idx.append(spatial_utils.is_inside_geo_df(thess_poly, lon=point[1], lat=point[2]))
        pass

    with open(output_file_path, 'w') as output_file:
        res_mins = int((t1 - t0).total_seconds() / 60)
        data_hours = int(sum(duration_days) * 24 * 60 / res_mins)
        start_ts_lk = obs_start.strftime('%Y-%m-%d %H:%M:%S')
        end_ts = forecast_end.strftime('%Y-%m-%d %H:%M:%S')

        output_file.write("%d %d %s %s\n" % (res_mins, data_hours, start_ts_lk, end_ts))

        for t in range(int(24 * 60 * duration_days[0] / res_mins) + 1):
            for i, point in enumerate(points):
                rf = float(obs[point_thess_idx[i]].values[t]) if point_thess_idx[i] is not None else 0
                output_file.write('%d %.1f\n' % (point[0], rf))

        forecast_start_idx = int(
            np.where(times == utils.datetime_lk_to_utc(obs_end, shift_mins=30).strftime('%Y-%m-%d_%H:%M:%S'))[0])
        for t in range(int(24 * 60 * duration_days[1] / res_mins) - 1):
            for point in points:
                rf_x = np.digitize(point[1], lon_bins)
                rf_y = np.digitize(point[2], lat_bins)
                if t + forecast_start_idx + 1 < len(times):
                    output_file.write('%d %.1f\n' % (point[0], diff[t + forecast_start_idx + 1, rf_y, rf_x]))
                else:
                    output_file.write('%d %.1f\n' % (point[0], 0))
コード例 #5
0
def create_kub_timeseries(adapter, stations, duration, opts):
    print("""
    *********************************************************
    *   Create KUB Data                                     *
    *********************************************************
    """)
    # Duration args destruction
    start_date_time = duration.get('start_date_time', None)
    end_date_time = duration.get('end_date_time', None)
    # Opts args destruction
    force_insert = opts.get('force_insert', False)

    variables = ['Precipitation']
    units = ['mm']
    metaData = {
        'station': 'Hanwella',
        'variable': 'Precipitation',
        'unit': 'mm',
        'type': 'Observed',
        'source': 'WeatherStation',
        'name': 'WUnderground',
    }

    for i in range(0, len(variables)):
        print('variable:', variables[i], ' unit:', units[i])
        meta = copy.deepcopy(metaData)
        meta['variable'] = variables[i]
        meta['unit'] = units[i]

        points = {}
        points_timeseries = {}

        # Get KUB basin shape file for checking weather station reside within it
        shp = ResourceManager.get_resource_path(
            'shp/kelani-upper-basin/kelani-upper-basin.shp')
        shape_polygon = get_basin_shape(shp)

        for station in stations:
            print('\n**************** STATION **************')
            print('station:', station['name'])
            #  Check whether station exists
            is_station_exists = adapter.get_station({'name': station['name']})
            if is_station_exists is None:
                logging.warning(
                    'Station %s does not exists. Continue with others.',
                    station['name'])
                continue

            # Check whether station reside within the basin
            # NOTE: No need to check weather inside the basin
            # if not Point(is_station_exists['longitude'], is_station_exists['latitude']).within(shape_polygon):
            #     logging.warning('Station %s does not contains inside KUB. Continue with others', station['name'])
            #     continue
            if 'basin' in station and station['basin'].lower() != 'kub':
                logging.warning(
                    'Station %s does not contains inside KUB. Continue with others',
                    station['name'])
                continue

            meta['station'] = station['name']
            if 'run_name' in station:
                meta['name'] = station['run_name']

            # -- Get Processed Timeseries for this station
            event_id = adapter.get_event_id(meta)
            if event_id is None:
                logging.warning(
                    'Event Id for %s does not exists. Continue with others',
                    station['name'])
                continue
            logging.debug('%s : eventId is %s. Search with %s',
                          station['name'], event_id, meta)

            opts = {
                'from': start_date_time.strftime("%Y-%m-%d %H:%M:%S"),
                'to': end_date_time.strftime("%Y-%m-%d %H:%M:%S"),
                'mode': Data.processed_data,
            }
            station_timeseries = adapter.retrieve_timeseries([event_id], opts)
            if len(station_timeseries) and len(
                    station_timeseries[0]['timeseries']) > 0:
                station_timeseries = station_timeseries[0]['timeseries']
            else:
                print('INFO: Timeseries does not have any data on :',
                      end_date_time.strftime("%Y-%m-%d"), opts,
                      station_timeseries)
                continue

            # -- Check whether timeseries worth to count in
            is_available = False
            if variables[i] in station['variables']:
                station_variable_index = station['variables'].index(
                    variables[i])
                min_values = station['min_values']
                max_values = station['max_values']
                validationObj = {
                    'max_value': max_values[station_variable_index],
                    'min_value': min_values[station_variable_index],
                }
                is_available = timeseries_availability(station_timeseries,
                                                       validationObj)

            # -- If a valid timeseries, store for further use
            if is_available:
                points[station['name']] = [
                    is_station_exists['longitude'],
                    is_station_exists['latitude']
                ]
                points_timeseries[station['name']] = station_timeseries
        # -- END For Loop - Getting data from stations

        if len(points) < 1:
            logging.warning(
                "No station data found for given period of time. Abort...")
            print("No station data found for given period of time. Abort...")
            continue
        else:
            if is_unique_points(points):
                logging.info('Available stations %s', points)
                print('Available stations:', points)
            else:
                logging.warning(
                    "Available points should be unique: %s. Abort...", points)
                print("Available points should be unique: %s. Abort..." %
                      points)
                continue

        thiessen_dict = {}
        total_area = 0.0
        # If there's one station, take as it's
        if len(points) is 1:
            thiessen_dict[list(points.keys())[0]] = 1
            total_area = 1
        # If there's two stations, take avarage of both
        elif len(points) is 2:
            thiessen_dict[list(points.keys())[0]] = 0.5
            thiessen_dict[list(points.keys())[1]] = 0.5
            total_area = 1
        # If there's more than two stations, then create thiessen polygon
        else:
            logging.debug("Create thiessen polygon for KUB using points: %s",
                          points)
            out = tempfile.mkdtemp(prefix='voronoi_')
            result = get_voronoi_polygons(points,
                                          shp, ['OBJECTID', 1],
                                          output_shape_file=os.path.join(
                                              out, 'out.shp'))
            print(result)
            for row in result.iterrows():
                if row[1][0] is not '__total_area__':
                    thiessen_dict[row[1][0]] = row[1][3]
                elif row[1][0] is '__total_area__':
                    total_area = row[1][3]

        if total_area is 0.0:
            logging.warning('Total Area can not be 0.0')
            return
        upper_thiessen_values = OrderedDict()
        for t_station_name in thiessen_dict.keys():
            thiessen_factor = thiessen_dict[t_station_name] / total_area
            for tt in points_timeseries.get(t_station_name, []):
                key = tt[0].timestamp()
                # If key doesn't contain in the dictionary, create new key
                if key not in upper_thiessen_values:
                    # If precipitation is not a positive values, does not create a value for that timestamp
                    if float(tt[1]) > -0.0001:
                        upper_thiessen_values[key] = float(
                            tt[1]) * thiessen_factor
                else:
                    # Added to thiessen value at that timestamp
                    upper_thiessen_values[key] += float(tt[1]) * (
                        thiessen_factor if float(tt[1]) > -0.0001 else 0)

        # Iterate through each timestamp
        kub_timeseries = []
        for avg in upper_thiessen_values:
            d = datetime.fromtimestamp(avg)
            kub_timeseries.append([
                d.strftime('%Y-%m-%d %H:%M:%S'),
                "%.3f" % upper_thiessen_values[avg]
            ])

        # -- Create Station for KLB Obs
        is_kub_station = adapter.get_station({'name': 'KUB Obs'})
        if is_kub_station is None:
            kub_station = \
                (Station.CUrW, 'curw_kub_obs', 'KUB Obs', 7.111666667, 80.14983333, 0, "Kelani Upper Basin Observation")
            adapter.create_station(kub_station)

        # -- Store KLB Timeseries
        metaKUB = copy.deepcopy(metaData)
        metaKUB['station'] = 'KUB Obs'
        metaKUB['variable'] = variables[i]
        metaKUB['unit'] = units[i]
        metaKUB['name'] = 'KUB Obs Mean'
        klb_event_id = adapter.get_event_id(metaKUB)
        if klb_event_id is None:
            klb_event_id = adapter.create_event_id(metaKUB)
            print('HASH SHA256 created: ', klb_event_id)
        else:
            print('HASH SHA256 exists: ', klb_event_id)
            opts = {
                'from': start_date_time.strftime("%Y-%m-%d %H:%M:%S"),
                'to': end_date_time.strftime("%Y-%m-%d %H:%M:%S"),
                'mode': Data.processed_data,
            }
            existingTimeseries = adapter.retrieve_timeseries(metaKUB, opts)
            if len(existingTimeseries) and len(
                    existingTimeseries[0]
                ['timeseries']) > 0 and not force_insert:
                print(
                    'Timeseries already exists. Use force insert to insert data.\n'
                )
                continue

        rowCount = \
            adapter.insert_timeseries(klb_event_id, kub_timeseries, upsert=force_insert, mode=Data.processed_data)
        print('%s rows inserted.\n' % rowCount)