Ejemplo n.º 1
0
        aval_triggers = ', '.join("{!s} ({!r})".format(key, val)
                                  for (key, val) in aval_triggers)

        s = u'{0};{1};{2};{3};{4};{5};{6};{7}\n'.format(
            m.occurrences, danger_levels, problems, aval_triggers, cause_names,
            aval_types, m.main_message_no, m.main_message_en)

        l.write(s)
    l.close()


if __name__ == "__main__":

    year = '2017-18'
    regions = gm.get_forecast_regions(year=year)
    date_from, date_to = gm.get_dates_from_season(year=year)

    # file names
    file_name_for_warnings_pickle = '{0}{1}'.format(
        env.local_storage, 'runmainmessage warnings.pickle')
    file_name_for_main_messages_pickle = '{0}{1}'.format(
        env.local_storage, 'runmainmessage main messages.pickle')
    file_name_for_main_messages_csv = '{0}{1}'.format(
        env.output_folder, 'Alle hovedbudskap {}.csv'.format(year))

    ##### pickle the warnings and dataset with main messages
    # pickle_warnings(regions, date_from, date_to, file_name_for_warnings_pickle)
    main_messages = select_messages_with_more(file_name_for_warnings_pickle)
    mp.pickle_anything(main_messages, file_name_for_main_messages_pickle)
    main_messages = mp.unpickle_anything(file_name_for_main_messages_pickle)
Ejemplo n.º 2
0
def get_all_observations(year,
                         output='Nest',
                         geohazard_tids=None,
                         lang_key=1,
                         max_file_age=23):
    """Specialized method for getting all observations for one season (1. sept to 31. august).
    For the current season (at the time of writing, 2018-19), if request has been made the last 23hrs,
    data is retrieved from a locally stored pickle, if not, new request is made to the regObs api. Previous
    seasons are not requested if a pickle is found in local storage.

    :param year:                [string] Eg. season '2017-18' (sept-sept) or one single year '2018'
    :param output:              [string] 'Nest' or 'List'
    :param geohazard_tids:      [int or list of ints] Default None gives all. Note, pickle stores all, but this option returns a select
    :param lang_key             [int] 1 is norwegian, 2 is english
    :param max_file_age:        [int] hrs how old the file is before new is retrieved

    :return:
    """

    from_date, to_date = gm.get_dates_from_season(year=year)
    file_name_list = '{0}all_observations_list_{1}_lk{2}.pickle'.format(
        env.local_storage, year, lang_key)
    file_name_nest = '{0}all_observations_nest_{1}_lk{2}.pickle'.format(
        env.local_storage, year, lang_key)
    get_new = True
    date_limit = dt.datetime.now() - dt.timedelta(hours=max_file_age)

    # if we are well out of the current season (30 days) its little chance the data set has changed.
    current_season = gm.get_season_from_date(dt.date.today() -
                                             dt.timedelta(30))

    if geohazard_tids:
        if not isinstance(geohazard_tids, list):
            geohazard_tids = [geohazard_tids]

    if os.path.exists(file_name_list):
        # if file contains a season long gone, dont make new.
        if year == current_season:
            file_age = dt.datetime.fromtimestamp(
                os.path.getmtime(file_name_list))
            # If file is newer than the given time limit, dont make new.
            if file_age > date_limit:
                # If file size larger than that of an nearly empty file, dont make new.
                if os.path.getsize(file_name_list) > 100:  # 100 bytes limit
                    get_new = False
        else:
            get_new = False

    if get_new:
        # When get new, get all geo hazards
        nested_observations = go.get_data_as_class(from_date=from_date,
                                                   to_date=to_date,
                                                   output='Nest',
                                                   geohazard_tids=None,
                                                   lang_key=lang_key)

        mp.pickle_anything(nested_observations, file_name_nest)

        listed_observations = []
        for d in nested_observations:
            for o in d.Observations:
                if _observation_is_not_empty(o):
                    listed_observations.append(o)
            for p in d.Pictures:
                # p['RegistrationName'] = 'Bilde'
                listed_observations.append(p)

        mp.pickle_anything(listed_observations, file_name_list)

    if output == 'Nest':
        all_nested_observations = mp.unpickle_anything(file_name_nest)
        nested_observations = []

        if geohazard_tids:
            for o in all_nested_observations:
                if o.GeoHazardTID in geohazard_tids:
                    nested_observations.append(o)

        else:
            nested_observations = all_nested_observations

        return nested_observations

    elif output == 'List':
        all_listed_observations = mp.unpickle_anything(file_name_list)
        listed_observations = []

        if geohazard_tids:
            for o in all_listed_observations:
                if o.GeoHazardTID in geohazard_tids:
                    listed_observations.append(o)

        else:
            listed_observations = all_listed_observations

        return listed_observations

    else:
        ml.log_and_print(
            '[warning] getvarsompickles.py -> get_all_registrations: Unknown output option'
        )
        return []
Ejemplo n.º 3
0
def _get_weather_obs(year, date=None, days=None, max_file_age=23):
    """
    Download data from the weather API:s
    :param year: String representation of season. None if a specific date should be fetched.
    :param max_file_age: Time to live for cache in hours.
    :param date: datetime.date. None if a whole season should be fetched.
    """
    aw = _get_raw_varsom(year, date, days, max_file_age=max_file_age)
    file_name = f'{se.local_storage}weather_v{CSV_VERSION}_{year}.pickle'
    file_date_limit = dt.datetime.now() - dt.timedelta(hours=max_file_age)
    current_season = gm.get_season_from_date(dt.date.today() -
                                             dt.timedelta(30))
    get_new = True

    if date:
        from_date = date - dt.timedelta(days=days)
        to_date = date + dt.timedelta(days=1)
        get_new = True
    else:
        from_date, to_date = gm.get_dates_from_season(year)
        to_date = to_date + dt.timedelta(days=1)

        try:
            # Don't fetch new data if old is cached. If older season file doesn't exists we get out via an exception.
            if dt.datetime.fromtimestamp(os.path.getmtime(
                    file_name)) > file_date_limit or year != current_season:
                get_new = False
        except FileNotFoundError:
            pass

    if get_new:
        futures_tuples = []
        weather_api_native = {}
        with futures.ThreadPoolExecutor(300) as executor:
            while from_date < to_date:
                if from_date.month in [7, 8, 9, 10]:
                    from_date = from_date.replace(day=1,
                                                  month=from_date.month + 1)
                    continue
                url = 'http://h-web03.nve.no/APSapi/TimeSeriesReader.svc/MountainWeather/-/{0}/no/true'.format(
                    from_date.isoformat())
                future = executor.submit(lambda: requests.get(url))
                futures_tuples.append((from_date, 0, future))
                from_date += dt.timedelta(days=1)

            while len(futures_tuples):
                from_date, retries, future = futures_tuples.pop()
                response = future.result()
                if response.status_code != requests.codes.ok:
                    if retries < 5:
                        url = 'http://h-web03.nve.no/APSapi/TimeSeriesReader.svc/MountainWeather/-/{0}/no/true'.format(
                            from_date.isoformat())
                        future = executor.submit(lambda: requests.get(url))
                        futures_tuples.insert(0,
                                              (from_date, retries + 1, future))
                    else:
                        print(
                            f"Failed to fetch weather for {from_date.isoformat()}, skipping",
                            file=sys.stderr)
                    continue

                json = response.json()
                for obs in json:
                    region = int(float(obs['RegionId']))
                    if region not in REGIONS:
                        continue
                    if obs['Attribute'] not in weather_api_native:
                        weather_api_native[obs['Attribute']] = {}
                    region = int(float(obs['RegionId']))
                    try:
                        weather_api_native[obs['Attribute']][(
                            from_date.isoformat(),
                            region)] = float(obs['Value'])
                    except ValueError:
                        weather_api_native[obs['Attribute']][(
                            from_date.isoformat(), region)] = obs['Value']

        if not date:
            with open(file_name, 'wb') as handle:
                pickle.dump(weather_api_native,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)
    else:
        try:
            with open(file_name, 'rb') as handle:
                weather_api_native = pickle.load(handle)
        except:
            os.remove(file_name)
            return _get_weather_obs(year, max_file_age)

    weather_api = {}
    for key, (orig_key, mapper) in WEATHER_API.items():
        weather_api[key] = {}
        if orig_key in weather_api_native:
            for date_region, value in weather_api_native[orig_key].items():
                weather_api[key][date_region] = mapper(value)

    weather_varsom = {}
    for forecast in aw:
        for key, (orig_key, mapper) in WEATHER_VARSOM.items():
            if key not in weather_varsom:
                weather_varsom[key] = {}
            date_region = (forecast.date_valid.isoformat(), forecast.region_id)
            weather_varsom[key][date_region] = mapper(
                getattr(forecast.mountain_weather, orig_key))

    # We prioritize APS before the Varsom data.
    # Varsom is manually adjusted by a professional,
    # but by the time we download it, the data will be
    # 18-24 h old.
    return merge(weather_varsom, weather_api)
Ejemplo n.º 4
0
def _get_regobs_obs(year,
                    requested_types,
                    date=None,
                    days=None,
                    max_file_age=23):
    regions = gm.get_forecast_regions(year=year, get_b_regions=True)
    observations = {}
    observations_neigh = {}
    varsomdata_obses = {}

    if len(requested_types) == 0:
        return observations

    file_name = f'{se.local_storage}regobs_v{CSV_VERSION}_{year}.pickle'
    file_date_limit = dt.datetime.now() - dt.timedelta(hours=max_file_age)
    current_season = gm.get_season_from_date(dt.date.today() -
                                             dt.timedelta(30))
    number_of_records = 50
    get_new = True

    try:
        # Don't fetch new data if old is cached. If older season file doesn't exists we get out via an exception.
        if dt.datetime.fromtimestamp(os.path.getmtime(
                file_name)) > file_date_limit or year != current_season:
            get_new = False
        if date:
            get_new = True
    except FileNotFoundError:
        pass

    if date:
        from_date = date - dt.timedelta(days=days)
        to_date = date
    else:
        from_date, to_date = gm.get_dates_from_season(year=year)

    if "AvalancheIndex" in requested_types:
        avalanche_index = True
    else:
        avalanche_index = False

    req_set = set(requested_types) & set(REG_ENG_V4.keys())

    # Make sure all requested elements from RegObs actually have the information we need specified
    if not min(
            map(lambda x: set(list(x.keys())).issuperset(req_set),
                [REGOBS_CLASSES, REGOBS_SCALARS, REG_ENG])):
        raise RegObsRegTypeError()

    url = "https://api.regobs.no/v4/Search"
    query = {
        "LangKey": 1,
        "FromDate": from_date.isoformat(),
        "ToDate": to_date.isoformat(),
        "SelectedRegistrationTypes": None,
        "SelectedRegions": regions,
        "NumberOfRecords": number_of_records,
        "Offset": 0
    }

    results = []

    def send_req(queries):
        query = queries.pop()
        try:
            req = requests.post(url=url, json=query)
            return (req, query)
        except:
            return (None, query)

    if get_new:
        future_tuples = []

        total_matches = requests.post(url=url + "/Count",
                                      json=query).json()["TotalMatches"]

        with futures.ThreadPoolExecutor(140) as executor:
            queries = []
            while query["Offset"] < total_matches:
                queries.append(query.copy())
                query["Offset"] += number_of_records

            for _ in range(0, len(queries)):
                future = executor.submit(send_req, queries)
                future_tuples.append((0, future))

            while len(future_tuples):
                retries, future = future_tuples.pop()
                try:
                    response, query = future.result()
                    raw_obses = response.json()
                except:
                    if retries < 5:
                        future = executor.submit(send_req, [query])
                        future_tuples.insert(0, (retries + 1, future))
                    else:
                        offset = json.loads(response.request.body)["Offset"]
                        print(
                            f"Failed to fetch regobs, offset {offset}, skipping",
                            file=sys.stderr)
                    continue
                results = results + raw_obses

        if not date:
            with open(file_name, 'wb') as handle:
                pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)
    else:
        try:
            with open(file_name, 'rb') as handle:
                results = pickle.load(handle)
        except:
            os.remove(file_name)
            return _get_regobs_obs(regions, year, requested_types,
                                   max_file_age)

    for raw_obs in results:
        date = dt.datetime.fromisoformat(raw_obs["DtObsTime"]).date()
        key = (date.isoformat(), raw_obs["ObsLocation"]["ForecastRegionTID"])
        if key not in observations:
            observations[key] = {}
            observations_neigh[key] = {}
            varsomdata_obses[key] = []
        for obs_type in req_set:
            if REG_ENG_V4[obs_type] not in raw_obs or not raw_obs[
                    REG_ENG_V4[obs_type]]:
                continue
            reg = raw_obs[REG_ENG_V4[obs_type]]

            # Ignore snow profiles of the old format
            if obs_type == "Snøprofil" and "StratProfile" not in reg:
                continue

            obs = {"competence": raw_obs["Observer"]["CompetenceLevelTID"]}
            try:
                for attr, categories in REGOBS_CLASSES[obs_type].items():
                    for cat_id, cat_name in categories.items():
                        if isinstance(reg, list):
                            obs[cat_name] = 1 if cat_id in map(
                                lambda x: x[attr], reg) else 0
                        else:
                            obs[cat_name] = 1 if cat_id == reg[attr] else 0
            except KeyError:
                pass
            try:
                for regobs_attr, conv in REGOBS_SCALARS[obs_type].values():
                    obs[regobs_attr] = 0
                    if isinstance(reg, list) and len(reg) > 0:
                        obs[regobs_attr] = reg[0][regobs_attr]
                    elif not isinstance(reg, list):
                        obs[regobs_attr] = reg[regobs_attr]
                    if obs[regobs_attr] is None:
                        obs[regobs_attr] = 0
            except KeyError:
                pass

            if obs_type not in observations[key]:
                observations[key][obs_type] = []
            observations[key][obs_type].append(obs)
        varsomdata_obses[key] += go.Observation(raw_obs).Observations

    # We want the most competent observations first
    for key, date_region in observations.items():
        for reg_key, reg_type in date_region.items():
            reg_type.sort(key=lambda x: x['competence'], reverse=True)
            observations_neigh[key][reg_key] = reg_type.copy()

    rng = np.random.default_rng(1984)
    for (date, region), date_region in observations.items():
        for reg_key in date_region.keys():
            reg_neigh = []
            for neighbour in rng.permutation(REGION_NEIGH[region]):
                try:
                    reg_neigh += observations[(date, neighbour)][reg_key]
                except KeyError:
                    pass
            reg_neigh.sort(key=lambda x: x['competence'], reverse=True)
            observations_neigh[(date, region)][reg_key] += reg_neigh

    df_dict = {}
    for key, observation in observations_neigh.items():
        # Use 2 most competent observations, and list both categories as well as scalars
        for obs_idx in range(0, 2):
            # One type of observation (test, danger signs etc.) at a time
            for regobs_type in req_set:
                obses = observation[
                    regobs_type] if regobs_type in observation else []
                # Go through each requested class attribute from the specified observation type
                for attr, cat in REGOBS_CLASSES[regobs_type].items():
                    # We handle categories using 1-hot, so we step through each category
                    for cat_name in cat.values():
                        attr_name = f"regobs_{REG_ENG[regobs_type]}_{_camel_to_snake(attr)}_{cat_name}_{obs_idx}"
                        if attr_name not in df_dict:
                            df_dict[attr_name] = {}
                        df_dict[attr_name][key] = obses[obs_idx][
                            cat_name] if len(obses) > obs_idx else 0
                # Go through all requested scalars
                for attr, (regobs_attr,
                           conv) in REGOBS_SCALARS[regobs_type].items():
                    attr_name = f"regobs_{REG_ENG[regobs_type]}_{_camel_to_snake(attr)}_{obs_idx}"
                    if attr_name not in df_dict:
                        df_dict[attr_name] = {}
                    try:
                        df_dict[attr_name][key] = conv(
                            obses[obs_idx]
                            [regobs_attr]) if len(obses) > obs_idx else 0
                    except TypeError:
                        df_dict[attr_name][key] = 0

        if "accuracy" not in df_dict:
            df_dict["accuracy"] = {}
        df_dict['accuracy'][key] = sum(
            map(lambda x: {
                0: 0,
                1: 1,
                2: -1,
                3: -1
            }[x['ForecastCorrectTID']], observation['Skredfarevurdering'])
        ) if 'Skredfarevurdering' in observation else 0

        if avalanche_index:
            if "regobs_avalancheidx" not in df_dict:
                df_dict["regobs_avalancheidx"] = {}
            avalanche_indices = list(
                map(lambda x: x.index,
                    gm.get_avalanche_index(varsomdata_obses[key])))
            if avalanche_indices:
                df_dict['regobs_avalancheidx'][key] = max(avalanche_indices)
            else:
                df_dict['regobs_avalancheidx'][key] = 0

    return df_dict
Ejemplo n.º 5
0
def _get_regobs_obs(regions, year, requested_types, max_file_age=23):
    observations = {}

    if len(requested_types) == 0:
        return observations

    file_name = f'{se.local_storage}regobs_{year}.pickle'
    file_date_limit = dt.datetime.now() - dt.timedelta(hours=max_file_age)
    current_season = gm.get_season_from_date(dt.date.today() -
                                             dt.timedelta(30))
    number_of_records = 50
    get_new = True

    try:
        # Don't fetch new data if old is cached. If older season file doesn't exists we get out via an exception.
        if dt.datetime.fromtimestamp(os.path.getmtime(
                file_name)) > file_date_limit or year != current_season:
            get_new = False
    except FileNotFoundError:
        pass

    from_date, to_date = gm.get_dates_from_season(year=year)

    req_set = set(requested_types)
    # Make sure all requested elements from RegObs actually have the information we need specified
    if not min(
            map(lambda x: set(list(x.keys())).issuperset(req_set),
                [REGOBS_CLASSES, REGOBS_SCALARS, REG_ENG])):
        raise RegObsRegTypeError()

    url = "https://api.nve.no/hydrology/regobs/webapi_v3.2.0/Search/Avalanche"
    query = {
        "LangKey": 1,
        "FromDate": from_date.isoformat(),
        "ToDate": to_date.isoformat(),
        "SelectedRegistrationTypes": [],
        "SelectedRegions": regions,
        "NumberOfRecords": number_of_records,
        "Offset": 0
    }

    response = []
    if get_new:
        while True:
            try:
                raw_obses = requests.post(url=url, json=query).json()
            except requests.exceptions.ConnectionError:
                time.sleep(1)
                continue
            response = response + raw_obses["Results"]

            query["Offset"] += number_of_records
            if raw_obses["ResultsInPage"] < number_of_records:
                with open(file_name, 'wb') as handle:
                    pickle.dump(response,
                                handle,
                                protocol=pickle.HIGHEST_PROTOCOL)
                break
    else:
        try:
            with open(file_name, 'rb') as handle:
                response = pickle.load(handle)
        except:
            os.remove(file_name)
            return _get_regobs_obs(regions, year, requested_types,
                                   max_file_age)

    for raw_obs in response:
        for reg in raw_obs["Registrations"]:
            obs_type = reg["RegistrationName"]
            if obs_type not in requested_types:
                continue
            # Ignore snow profiles of the old format
            if obs_type == "Snøprofil" and "StratProfile" not in reg[
                    "FullObject"]:
                continue

            obs = {"competence": raw_obs["CompetenceLevelTid"]}
            try:
                for attr, categories in REGOBS_CLASSES[obs_type].items():
                    value = reg["FullObject"][attr]
                    for cat_id, cat_name in categories.items():
                        obs[cat_name] = 1 if cat_id == value else 0
            except KeyError:
                pass
            try:
                for regobs_attr, conv in REGOBS_SCALARS[obs_type].values():
                    obs[regobs_attr] = reg["FullObject"][regobs_attr]
            except KeyError:
                pass

            date = dt.datetime.fromisoformat(raw_obs["DtObsTime"]).date()
            key = (raw_obs["ForecastRegionTid"], date)
            if key not in observations:
                observations[key] = {}
            if obs_type not in observations[key]:
                observations[key][obs_type] = []
            observations[key][obs_type].append(obs)

    # We want the most competent observations first
    for date_region in observations.values():
        for reg_type in date_region.values():
            reg_type.sort(key=lambda x: x['competence'], reverse=True)

    return observations