aval_triggers = ', '.join("{!s} ({!r})".format(key, val) for (key, val) in aval_triggers) s = u'{0};{1};{2};{3};{4};{5};{6};{7}\n'.format( m.occurrences, danger_levels, problems, aval_triggers, cause_names, aval_types, m.main_message_no, m.main_message_en) l.write(s) l.close() if __name__ == "__main__": year = '2017-18' regions = gm.get_forecast_regions(year=year) date_from, date_to = gm.get_dates_from_season(year=year) # file names file_name_for_warnings_pickle = '{0}{1}'.format( env.local_storage, 'runmainmessage warnings.pickle') file_name_for_main_messages_pickle = '{0}{1}'.format( env.local_storage, 'runmainmessage main messages.pickle') file_name_for_main_messages_csv = '{0}{1}'.format( env.output_folder, 'Alle hovedbudskap {}.csv'.format(year)) ##### pickle the warnings and dataset with main messages # pickle_warnings(regions, date_from, date_to, file_name_for_warnings_pickle) main_messages = select_messages_with_more(file_name_for_warnings_pickle) mp.pickle_anything(main_messages, file_name_for_main_messages_pickle) main_messages = mp.unpickle_anything(file_name_for_main_messages_pickle)
def get_all_observations(year, output='Nest', geohazard_tids=None, lang_key=1, max_file_age=23): """Specialized method for getting all observations for one season (1. sept to 31. august). For the current season (at the time of writing, 2018-19), if request has been made the last 23hrs, data is retrieved from a locally stored pickle, if not, new request is made to the regObs api. Previous seasons are not requested if a pickle is found in local storage. :param year: [string] Eg. season '2017-18' (sept-sept) or one single year '2018' :param output: [string] 'Nest' or 'List' :param geohazard_tids: [int or list of ints] Default None gives all. Note, pickle stores all, but this option returns a select :param lang_key [int] 1 is norwegian, 2 is english :param max_file_age: [int] hrs how old the file is before new is retrieved :return: """ from_date, to_date = gm.get_dates_from_season(year=year) file_name_list = '{0}all_observations_list_{1}_lk{2}.pickle'.format( env.local_storage, year, lang_key) file_name_nest = '{0}all_observations_nest_{1}_lk{2}.pickle'.format( env.local_storage, year, lang_key) get_new = True date_limit = dt.datetime.now() - dt.timedelta(hours=max_file_age) # if we are well out of the current season (30 days) its little chance the data set has changed. current_season = gm.get_season_from_date(dt.date.today() - dt.timedelta(30)) if geohazard_tids: if not isinstance(geohazard_tids, list): geohazard_tids = [geohazard_tids] if os.path.exists(file_name_list): # if file contains a season long gone, dont make new. if year == current_season: file_age = dt.datetime.fromtimestamp( os.path.getmtime(file_name_list)) # If file is newer than the given time limit, dont make new. if file_age > date_limit: # If file size larger than that of an nearly empty file, dont make new. if os.path.getsize(file_name_list) > 100: # 100 bytes limit get_new = False else: get_new = False if get_new: # When get new, get all geo hazards nested_observations = go.get_data_as_class(from_date=from_date, to_date=to_date, output='Nest', geohazard_tids=None, lang_key=lang_key) mp.pickle_anything(nested_observations, file_name_nest) listed_observations = [] for d in nested_observations: for o in d.Observations: if _observation_is_not_empty(o): listed_observations.append(o) for p in d.Pictures: # p['RegistrationName'] = 'Bilde' listed_observations.append(p) mp.pickle_anything(listed_observations, file_name_list) if output == 'Nest': all_nested_observations = mp.unpickle_anything(file_name_nest) nested_observations = [] if geohazard_tids: for o in all_nested_observations: if o.GeoHazardTID in geohazard_tids: nested_observations.append(o) else: nested_observations = all_nested_observations return nested_observations elif output == 'List': all_listed_observations = mp.unpickle_anything(file_name_list) listed_observations = [] if geohazard_tids: for o in all_listed_observations: if o.GeoHazardTID in geohazard_tids: listed_observations.append(o) else: listed_observations = all_listed_observations return listed_observations else: ml.log_and_print( '[warning] getvarsompickles.py -> get_all_registrations: Unknown output option' ) return []
def _get_weather_obs(year, date=None, days=None, max_file_age=23): """ Download data from the weather API:s :param year: String representation of season. None if a specific date should be fetched. :param max_file_age: Time to live for cache in hours. :param date: datetime.date. None if a whole season should be fetched. """ aw = _get_raw_varsom(year, date, days, max_file_age=max_file_age) file_name = f'{se.local_storage}weather_v{CSV_VERSION}_{year}.pickle' file_date_limit = dt.datetime.now() - dt.timedelta(hours=max_file_age) current_season = gm.get_season_from_date(dt.date.today() - dt.timedelta(30)) get_new = True if date: from_date = date - dt.timedelta(days=days) to_date = date + dt.timedelta(days=1) get_new = True else: from_date, to_date = gm.get_dates_from_season(year) to_date = to_date + dt.timedelta(days=1) try: # Don't fetch new data if old is cached. If older season file doesn't exists we get out via an exception. if dt.datetime.fromtimestamp(os.path.getmtime( file_name)) > file_date_limit or year != current_season: get_new = False except FileNotFoundError: pass if get_new: futures_tuples = [] weather_api_native = {} with futures.ThreadPoolExecutor(300) as executor: while from_date < to_date: if from_date.month in [7, 8, 9, 10]: from_date = from_date.replace(day=1, month=from_date.month + 1) continue url = 'http://h-web03.nve.no/APSapi/TimeSeriesReader.svc/MountainWeather/-/{0}/no/true'.format( from_date.isoformat()) future = executor.submit(lambda: requests.get(url)) futures_tuples.append((from_date, 0, future)) from_date += dt.timedelta(days=1) while len(futures_tuples): from_date, retries, future = futures_tuples.pop() response = future.result() if response.status_code != requests.codes.ok: if retries < 5: url = 'http://h-web03.nve.no/APSapi/TimeSeriesReader.svc/MountainWeather/-/{0}/no/true'.format( from_date.isoformat()) future = executor.submit(lambda: requests.get(url)) futures_tuples.insert(0, (from_date, retries + 1, future)) else: print( f"Failed to fetch weather for {from_date.isoformat()}, skipping", file=sys.stderr) continue json = response.json() for obs in json: region = int(float(obs['RegionId'])) if region not in REGIONS: continue if obs['Attribute'] not in weather_api_native: weather_api_native[obs['Attribute']] = {} region = int(float(obs['RegionId'])) try: weather_api_native[obs['Attribute']][( from_date.isoformat(), region)] = float(obs['Value']) except ValueError: weather_api_native[obs['Attribute']][( from_date.isoformat(), region)] = obs['Value'] if not date: with open(file_name, 'wb') as handle: pickle.dump(weather_api_native, handle, protocol=pickle.HIGHEST_PROTOCOL) else: try: with open(file_name, 'rb') as handle: weather_api_native = pickle.load(handle) except: os.remove(file_name) return _get_weather_obs(year, max_file_age) weather_api = {} for key, (orig_key, mapper) in WEATHER_API.items(): weather_api[key] = {} if orig_key in weather_api_native: for date_region, value in weather_api_native[orig_key].items(): weather_api[key][date_region] = mapper(value) weather_varsom = {} for forecast in aw: for key, (orig_key, mapper) in WEATHER_VARSOM.items(): if key not in weather_varsom: weather_varsom[key] = {} date_region = (forecast.date_valid.isoformat(), forecast.region_id) weather_varsom[key][date_region] = mapper( getattr(forecast.mountain_weather, orig_key)) # We prioritize APS before the Varsom data. # Varsom is manually adjusted by a professional, # but by the time we download it, the data will be # 18-24 h old. return merge(weather_varsom, weather_api)
def _get_regobs_obs(year, requested_types, date=None, days=None, max_file_age=23): regions = gm.get_forecast_regions(year=year, get_b_regions=True) observations = {} observations_neigh = {} varsomdata_obses = {} if len(requested_types) == 0: return observations file_name = f'{se.local_storage}regobs_v{CSV_VERSION}_{year}.pickle' file_date_limit = dt.datetime.now() - dt.timedelta(hours=max_file_age) current_season = gm.get_season_from_date(dt.date.today() - dt.timedelta(30)) number_of_records = 50 get_new = True try: # Don't fetch new data if old is cached. If older season file doesn't exists we get out via an exception. if dt.datetime.fromtimestamp(os.path.getmtime( file_name)) > file_date_limit or year != current_season: get_new = False if date: get_new = True except FileNotFoundError: pass if date: from_date = date - dt.timedelta(days=days) to_date = date else: from_date, to_date = gm.get_dates_from_season(year=year) if "AvalancheIndex" in requested_types: avalanche_index = True else: avalanche_index = False req_set = set(requested_types) & set(REG_ENG_V4.keys()) # Make sure all requested elements from RegObs actually have the information we need specified if not min( map(lambda x: set(list(x.keys())).issuperset(req_set), [REGOBS_CLASSES, REGOBS_SCALARS, REG_ENG])): raise RegObsRegTypeError() url = "https://api.regobs.no/v4/Search" query = { "LangKey": 1, "FromDate": from_date.isoformat(), "ToDate": to_date.isoformat(), "SelectedRegistrationTypes": None, "SelectedRegions": regions, "NumberOfRecords": number_of_records, "Offset": 0 } results = [] def send_req(queries): query = queries.pop() try: req = requests.post(url=url, json=query) return (req, query) except: return (None, query) if get_new: future_tuples = [] total_matches = requests.post(url=url + "/Count", json=query).json()["TotalMatches"] with futures.ThreadPoolExecutor(140) as executor: queries = [] while query["Offset"] < total_matches: queries.append(query.copy()) query["Offset"] += number_of_records for _ in range(0, len(queries)): future = executor.submit(send_req, queries) future_tuples.append((0, future)) while len(future_tuples): retries, future = future_tuples.pop() try: response, query = future.result() raw_obses = response.json() except: if retries < 5: future = executor.submit(send_req, [query]) future_tuples.insert(0, (retries + 1, future)) else: offset = json.loads(response.request.body)["Offset"] print( f"Failed to fetch regobs, offset {offset}, skipping", file=sys.stderr) continue results = results + raw_obses if not date: with open(file_name, 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL) else: try: with open(file_name, 'rb') as handle: results = pickle.load(handle) except: os.remove(file_name) return _get_regobs_obs(regions, year, requested_types, max_file_age) for raw_obs in results: date = dt.datetime.fromisoformat(raw_obs["DtObsTime"]).date() key = (date.isoformat(), raw_obs["ObsLocation"]["ForecastRegionTID"]) if key not in observations: observations[key] = {} observations_neigh[key] = {} varsomdata_obses[key] = [] for obs_type in req_set: if REG_ENG_V4[obs_type] not in raw_obs or not raw_obs[ REG_ENG_V4[obs_type]]: continue reg = raw_obs[REG_ENG_V4[obs_type]] # Ignore snow profiles of the old format if obs_type == "Snøprofil" and "StratProfile" not in reg: continue obs = {"competence": raw_obs["Observer"]["CompetenceLevelTID"]} try: for attr, categories in REGOBS_CLASSES[obs_type].items(): for cat_id, cat_name in categories.items(): if isinstance(reg, list): obs[cat_name] = 1 if cat_id in map( lambda x: x[attr], reg) else 0 else: obs[cat_name] = 1 if cat_id == reg[attr] else 0 except KeyError: pass try: for regobs_attr, conv in REGOBS_SCALARS[obs_type].values(): obs[regobs_attr] = 0 if isinstance(reg, list) and len(reg) > 0: obs[regobs_attr] = reg[0][regobs_attr] elif not isinstance(reg, list): obs[regobs_attr] = reg[regobs_attr] if obs[regobs_attr] is None: obs[regobs_attr] = 0 except KeyError: pass if obs_type not in observations[key]: observations[key][obs_type] = [] observations[key][obs_type].append(obs) varsomdata_obses[key] += go.Observation(raw_obs).Observations # We want the most competent observations first for key, date_region in observations.items(): for reg_key, reg_type in date_region.items(): reg_type.sort(key=lambda x: x['competence'], reverse=True) observations_neigh[key][reg_key] = reg_type.copy() rng = np.random.default_rng(1984) for (date, region), date_region in observations.items(): for reg_key in date_region.keys(): reg_neigh = [] for neighbour in rng.permutation(REGION_NEIGH[region]): try: reg_neigh += observations[(date, neighbour)][reg_key] except KeyError: pass reg_neigh.sort(key=lambda x: x['competence'], reverse=True) observations_neigh[(date, region)][reg_key] += reg_neigh df_dict = {} for key, observation in observations_neigh.items(): # Use 2 most competent observations, and list both categories as well as scalars for obs_idx in range(0, 2): # One type of observation (test, danger signs etc.) at a time for regobs_type in req_set: obses = observation[ regobs_type] if regobs_type in observation else [] # Go through each requested class attribute from the specified observation type for attr, cat in REGOBS_CLASSES[regobs_type].items(): # We handle categories using 1-hot, so we step through each category for cat_name in cat.values(): attr_name = f"regobs_{REG_ENG[regobs_type]}_{_camel_to_snake(attr)}_{cat_name}_{obs_idx}" if attr_name not in df_dict: df_dict[attr_name] = {} df_dict[attr_name][key] = obses[obs_idx][ cat_name] if len(obses) > obs_idx else 0 # Go through all requested scalars for attr, (regobs_attr, conv) in REGOBS_SCALARS[regobs_type].items(): attr_name = f"regobs_{REG_ENG[regobs_type]}_{_camel_to_snake(attr)}_{obs_idx}" if attr_name not in df_dict: df_dict[attr_name] = {} try: df_dict[attr_name][key] = conv( obses[obs_idx] [regobs_attr]) if len(obses) > obs_idx else 0 except TypeError: df_dict[attr_name][key] = 0 if "accuracy" not in df_dict: df_dict["accuracy"] = {} df_dict['accuracy'][key] = sum( map(lambda x: { 0: 0, 1: 1, 2: -1, 3: -1 }[x['ForecastCorrectTID']], observation['Skredfarevurdering']) ) if 'Skredfarevurdering' in observation else 0 if avalanche_index: if "regobs_avalancheidx" not in df_dict: df_dict["regobs_avalancheidx"] = {} avalanche_indices = list( map(lambda x: x.index, gm.get_avalanche_index(varsomdata_obses[key]))) if avalanche_indices: df_dict['regobs_avalancheidx'][key] = max(avalanche_indices) else: df_dict['regobs_avalancheidx'][key] = 0 return df_dict
def _get_regobs_obs(regions, year, requested_types, max_file_age=23): observations = {} if len(requested_types) == 0: return observations file_name = f'{se.local_storage}regobs_{year}.pickle' file_date_limit = dt.datetime.now() - dt.timedelta(hours=max_file_age) current_season = gm.get_season_from_date(dt.date.today() - dt.timedelta(30)) number_of_records = 50 get_new = True try: # Don't fetch new data if old is cached. If older season file doesn't exists we get out via an exception. if dt.datetime.fromtimestamp(os.path.getmtime( file_name)) > file_date_limit or year != current_season: get_new = False except FileNotFoundError: pass from_date, to_date = gm.get_dates_from_season(year=year) req_set = set(requested_types) # Make sure all requested elements from RegObs actually have the information we need specified if not min( map(lambda x: set(list(x.keys())).issuperset(req_set), [REGOBS_CLASSES, REGOBS_SCALARS, REG_ENG])): raise RegObsRegTypeError() url = "https://api.nve.no/hydrology/regobs/webapi_v3.2.0/Search/Avalanche" query = { "LangKey": 1, "FromDate": from_date.isoformat(), "ToDate": to_date.isoformat(), "SelectedRegistrationTypes": [], "SelectedRegions": regions, "NumberOfRecords": number_of_records, "Offset": 0 } response = [] if get_new: while True: try: raw_obses = requests.post(url=url, json=query).json() except requests.exceptions.ConnectionError: time.sleep(1) continue response = response + raw_obses["Results"] query["Offset"] += number_of_records if raw_obses["ResultsInPage"] < number_of_records: with open(file_name, 'wb') as handle: pickle.dump(response, handle, protocol=pickle.HIGHEST_PROTOCOL) break else: try: with open(file_name, 'rb') as handle: response = pickle.load(handle) except: os.remove(file_name) return _get_regobs_obs(regions, year, requested_types, max_file_age) for raw_obs in response: for reg in raw_obs["Registrations"]: obs_type = reg["RegistrationName"] if obs_type not in requested_types: continue # Ignore snow profiles of the old format if obs_type == "Snøprofil" and "StratProfile" not in reg[ "FullObject"]: continue obs = {"competence": raw_obs["CompetenceLevelTid"]} try: for attr, categories in REGOBS_CLASSES[obs_type].items(): value = reg["FullObject"][attr] for cat_id, cat_name in categories.items(): obs[cat_name] = 1 if cat_id == value else 0 except KeyError: pass try: for regobs_attr, conv in REGOBS_SCALARS[obs_type].values(): obs[regobs_attr] = reg["FullObject"][regobs_attr] except KeyError: pass date = dt.datetime.fromisoformat(raw_obs["DtObsTime"]).date() key = (raw_obs["ForecastRegionTid"], date) if key not in observations: observations[key] = {} if obs_type not in observations[key]: observations[key][obs_type] = [] observations[key][obs_type].append(obs) # We want the most competent observations first for date_region in observations.values(): for reg_type in date_region.values(): reg_type.sort(key=lambda x: x['competence'], reverse=True) return observations