def historic_averages_handler(request, average_type):
    """
    Controller for retrieving averages
    """
    # handle the parameters from the user
    try:
        reach_id, region, units, return_format = handle_parameters(request)
        units_title, units_title_long = get_units_title(units)
        forcing = request.args.get('forcing', 'era_5')
        hist_df = get_historical_dataframe(reach_id, region, units, forcing)
    except Exception as e:
        raise e

    hist_df.index = pd.to_datetime(hist_df.index)
    if average_type == 'daily':
        hist_df = hd.daily_average(hist_df, rolling=True)
    else:
        hist_df = hd.monthly_average(hist_df)
    hist_df.index.name = 'datetime'

    if return_format == 'csv':
        response = make_response(hist_df.to_csv())
        response.headers['content-type'] = 'text/csv'
        response.headers[
            'Content-Disposition'] = f'attachment; filename=seasonal_average_{forcing}_{reach_id}.csv'
        return response

    if return_format == "json":
        return jsonify({
            'region':
            region,
            'simulation_forcing':
            forcing,
            'forcing_fullname':
            forcing.replace('era_', 'ERA ').title(),
            'comid':
            reach_id,
            'gendate':
            datetime.datetime.utcnow().isoformat() + 'Z',
            'time_series': {
                'datetime': hist_df.index.tolist(),
                'flow': hist_df[f'streamflow_{units_title}^3/s'].tolist(),
            },
            'units': {
                'name': 'Streamflow',
                'short': f'{units_title}3/s',
                'long': f'Cubic {units_title_long} per Second'
            }
        })

    else:
        raise ValueError(f'Invalid return_format: {return_format}')
def historic_data_handler(request):
    """
    Controller for retrieving simulated historic data
    """
    # handle the parameters from the user
    try:
        reach_id, region, units, return_format = handle_parameters(request)
        units_title, units_title_long = get_units_title(units)
        forcing = request.args.get('forcing', 'era_5')
        hist_df = get_historical_dataframe(reach_id, region, units, forcing)
    except Exception as e:
        raise e

    # if csv, return the dataframe as csv
    if return_format == 'csv':
        response = make_response(hist_df.to_csv())
        response.headers['content-type'] = 'text/csv'
        response.headers[
            'Content-Disposition'] = f'attachment; filename=historic_streamflow_{forcing}_{reach_id}.csv'
        return response

    # if you wanted json out, create and return json
    #  era_interim or era_5
    if return_format == "json":
        return {
            'region': region,
            'simulation_forcing': forcing,
            'forcing_fullname': forcing.replace('era_', 'ERA ').title(),
            'comid': reach_id,
            'gendate': datetime.datetime.utcnow().isoformat() + 'Z',
            'startdate': hist_df.index[0],
            'enddate': hist_df.index[-1],
            'time_series': {
                'datetime': hist_df.index.tolist(),
                'flow': hist_df[f'streamflow_{units_title}^3/s'].tolist(),
            },
            'units': {
                'name': 'Streamflow',
                'short': f'{units_title}3/s',
                'long': f'Cubic {units_title_long} per Second'
            }
        }

    else:
        return jsonify({"error": "Invalid return_format."}), 422
def return_periods_handler(request):
    """
    Controller for retrieving seasonal averages
    """
    # handle the parameters from the user
    try:
        reach_id, region, units, return_format = handle_parameters(request)
    except Exception as e:
        raise e
    forcing = request.args.get('forcing', 'era_5')

    if forcing == 'era_interim':
        forcing_fullname = 'ERA Interim'
        historical_data_file = glob.glob(
            os.path.join(PATH_TO_ERA_INTERIM, region,
                         '*return_periods*.nc*'))[0]
        startdate = '1980-01-01T00:00:00Z'
        enddate = '2014-12-31T00:00:00Z'
    elif forcing == 'era_5':
        forcing_fullname = 'ERA 5'
        historical_data_file = glob.glob(
            os.path.join(PATH_TO_ERA_5, region, '*return_periods*.nc*'))[0]
        startdate = '1979-01-01T00:00:00Z'
        enddate = '2018-12-31T00:00:00Z'
    else:
        return {
            "error": "Invalid forcing specified, choose era_interim or era_5"
        }, 422

    # handle the units
    units_title, units_title_long = get_units_title(units)

    # collect the data in a dataframe
    qout_nc = xarray.open_dataset(historical_data_file)
    qout_data = qout_nc.to_dataframe()
    try:
        del qout_data['lon'], qout_data['lat']
    except Exception:
        pass
    qout_data = qout_data[qout_data.index == reach_id]
    if units == 'english':
        for column in qout_data:
            qout_data[column] *= M3_TO_FT3

    # if csv, return the dataframe as csv
    if return_format == 'csv':
        response = make_response(qout_data.to_csv())
        response.headers['content-type'] = 'text/csv'
        response.headers[
            'Content-Disposition'] = f'attachment; filename=return_periods_{forcing}_{reach_id}.csv'
        return response

    # create a json of the data
    json_output = {
        'return_periods': json.loads(qout_data.to_json(orient='records'))[0],
        'region': region,
        'comid': reach_id,
        'simulation_forcing': forcing,
        'forcing_fullname': forcing_fullname,
        'gendate': datetime.datetime.utcnow().isoformat() + 'Z',
        'startdate': startdate,
        'enddate': enddate,
        'units': {
            'name': 'Streamflow',
            'short': f'{units_title}3/s',
            'long': f'Cubic {units_title_long} per Second'
        }
    }

    # if you wanted json out, return json
    if return_format == "json":
        return jsonify(json_output)

    # use the json to render a waterml document
    if return_format == "waterml":
        xml_response = make_response(
            render_template('return_periods.xml', **json_output))
        xml_response.headers.set('Content-Type', 'application/xml')
        return xml_response

    else:
        return jsonify({"error": "Invalid return_format."}), 422
Example #4
0
def forecast_records_handler(request):
    # handle the parameters from the user
    try:
        reach_id, region, units, return_format = handle_parameters(request)
    except Exception as e:
        raise ValueError(e)
    year = dt.utcnow().year
    start_date = request.args.get(
        'start_date',
        dt(year=year, month=1, day=1).strftime('%Y%m%d'))
    end_date = request.args.get(
        'end_date',
        dt(year=year, month=12, day=31).strftime('%Y%m%d'))

    try:
        start_date = dt.strptime(start_date, '%Y%m%d')
        end_date = dt.strptime(end_date, '%Y%m%d')
    except:
        raise ValueError(
            f'Unrecognized start_date "{start_date}" or end_date "{end_date}". Use YYYYMMDD format'
        )

    # handle the units
    units_title, units_title_long = get_units_title(units)

    # open and read the forecast record netcdf
    record_path = os.path.join(PATH_TO_FORECAST_RECORDS, region,
                               f'forecast_record-{year}-{region}.nc')
    forecast_record = xarray.open_dataset(record_path)
    times = pd.to_datetime(pd.Series(forecast_record['time'].data,
                                     name='datetime'),
                           unit='s',
                           origin='unix')
    record_flows = forecast_record.sel(rivid=reach_id)['Qout']
    forecast_record.close()

    # create a dataframe and filter by date
    df = times.to_frame().join(
        pd.Series(record_flows, name=f'streamflow_{units_title}^3/s'))
    df = df[df['datetime'].between(start_date, end_date)]
    df.index = df['datetime']
    del df['datetime']
    df.index = df.index.strftime('%Y-%m-%dT%H:%M:%SZ')
    df.index.name = 'datetime'
    df[df[f'streamflow_{units_title}^3/s'] > 1000000000] = np.nan
    df.dropna(inplace=True)
    if units == 'english':
        df[f'streamflow_{units_title}^3/s'] *= M3_TO_FT3

    # create the http response
    if return_format == 'csv':
        response = make_response(df.to_csv())
        response.headers['content-type'] = 'text/csv'
        response.headers[
            'Content-Disposition'] = f'attachment; filename=forecast_record_{reach_id}.csv'
        return response

    elif return_format == 'json':
        return {
            'region': region,
            'comid': reach_id,
            'gendate': dt.utcnow().isoformat() + 'Z',
            'startdate': df.index[0],
            'enddate': df.index[-1],
            'units': {
                'name': 'Streamflow',
                'short': f'{units_title}^3/s',
                'long': f'Cubic {units_title_long} per Second',
            },
            'time_series': {
                'datetime': df.index.tolist(),
                'flow': df[f'streamflow_{units_title}^3/s'].tolist(),
            }
        }

    else:
        raise ValueError(f'Invalid return_format "{return_format}"')
Example #5
0
def forecast_stats_handler(request):
    """
    Controller that will retrieve forecast statistics data in different formats
    """
    # handle the parameters from the user
    try:
        reach_id, region, units, return_format = handle_parameters(request)
    except Exception as e:
        raise e
    forecast_folder = request.args.get('date', 'most_recent')

    # handle the units
    units_title, units_title_long = get_units_title(units)

    # find/check current output datasets
    path_to_output_files = os.path.join(PATH_TO_FORECASTS, region)
    forecast_nc_list, start_date = ecmwf_find_most_current_files(
        path_to_output_files, forecast_folder)
    forecast_nc_list = sorted(forecast_nc_list)
    if not forecast_nc_list or not start_date:
        raise ValueError(
            f'ECMWF forecast for region "{region}" and date "{start_date}" not found'
        )

    try:
        # combine 52 ensembles
        qout_datasets = []
        ensemble_index_list = []
        for forecast_nc in forecast_nc_list:
            ensemble_index_list.append(
                int(os.path.basename(forecast_nc)[:-3].split("_")[-1]))
            qout_datasets.append(
                xarray.open_dataset(forecast_nc).sel(rivid=reach_id).Qout)
        merged_ds = xarray.concat(
            qout_datasets, pd.Index(ensemble_index_list, name='ensemble'))

        # get an array of all the ensembles, delete the high res before doing averages
        merged_array = merged_ds.data
        merged_array = np.delete(merged_array,
                                 list(merged_ds.ensemble.data).index(52),
                                 axis=0)
    except:
        raise ValueError('Error while reading data from the netCDF files')

    # replace any values that went negative because of the muskingham routing
    merged_array[merged_array <= 0] = 0

    # load all the series into a dataframe
    df = pd.DataFrame(
        {
            f'flow_max_{units_title}^3/s': np.amax(merged_array, axis=0),
            f'flow_75%_{units_title}^3/s': np.percentile(
                merged_array, 75, axis=0),
            f'flow_avg_{units_title}^3/s': np.mean(merged_array, axis=0),
            f'flow_25%_{units_title}^3/s': np.percentile(
                merged_array, 25, axis=0),
            f'flow_min_{units_title}^3/s': np.min(merged_array, axis=0),
            f'high_res_{units_title}^3/s': merged_ds.sel(ensemble=52).data
        },
        index=merged_ds.time.data)
    df.index = df.index.strftime('%Y-%m-%dT%H:%M:%SZ')
    df.index.name = 'datetime'

    # handle units conversion
    if units_title == 'ft':
        for column in df.columns:
            df[column] *= M3_TO_FT3

    if return_format == 'csv':
        response = make_response(df.to_csv())
        response.headers['content-type'] = 'text/csv'
        response.headers['Content-Disposition'] = \
            f'attachment; filename=forecasted_streamflow_{region}_{reach_id}_{units_title}^3/s.csv'
        return response

    # split the df so that we can use dropna to lists of dates and values without na entries
    high_res_data = df[f'high_res_{units_title}^3/s'].dropna()
    del df[f'high_res_{units_title}^3/s']
    df.dropna(inplace=True)

    # create a dictionary with the metadata and series of values
    context = {
        'region': region,
        'comid': reach_id,
        'gendate': dt.utcnow().isoformat() + 'Z',
        'startdate': df.index[0],
        'enddate': df.index[-1],
        'units': {
            'name': 'Streamflow',
            'short': f'{units_title}^3/s',
            'long': f'Cubic {units_title_long} per Second',
        },
        'time_series': {
            'datetime': df.index.tolist(),
            'datetime_high_res': high_res_data.index.tolist(),
            'high_res': high_res_data.to_list(),
        }
    }
    context['time_series'].update(df.to_dict(orient='list'))

    if return_format == "json":
        return jsonify(context)

    else:
        raise ValueError('Invalid return_format')
Example #6
0
def forecast_ensembles_handler(request):
    """
    Controller that will retrieve forecast ensemble data in different formats
    """
    # handle the parameters from the user
    try:
        reach_id, region, units, return_format = handle_parameters(request)
    except Exception as e:
        raise ValueError(e)
    ensemble = request.args.get('ensemble', 'all')
    forecast_folder = request.args.get('date', 'most_recent')

    # handle the units
    units_title, units_title_long = get_units_title(units)

    # find/check current output datasets
    path_to_output_files = os.path.join(PATH_TO_FORECASTS, region)
    forecast_nc_list, start_date = ecmwf_find_most_current_files(
        path_to_output_files, forecast_folder)
    forecast_nc_list = sorted(forecast_nc_list)
    if not forecast_nc_list or not start_date:
        raise ValueError(
            f'ECMWF forecast for region "{region}" and date "{start_date}" not found'
        )

    try:
        # combine 52 ensembles with xarray
        qout_datasets = []
        ensemble_index_list = []
        for forecast_nc in forecast_nc_list:
            ensemble_index_list.append(
                int(os.path.basename(forecast_nc)[:-3].split("_")[-1]))
            qout_datasets.append(
                xarray.open_dataset(forecast_nc).sel(rivid=reach_id).Qout)
        merged_ds = xarray.concat(
            qout_datasets, pd.Index(ensemble_index_list, name='ensemble'))
    except:
        raise ValueError('Error while reading data from the netCDF files')

    # make a list column names (with zero padded numbers) for the pandas DataFrame
    ensemble_column_names = []
    for i in ensemble_index_list:
        ensemble_column_names.append(f'ensemble_{i:02}_{units_title}^3/s')

    # make the data into a pandas dataframe
    df = pd.DataFrame(data=np.transpose(merged_ds.data),
                      columns=ensemble_column_names,
                      index=merged_ds.time.data)
    df.index = df.index.strftime('%Y-%m-%dT%H:%M:%SZ')
    df.index.name = 'datetime'

    # handle units conversion
    if units_title == 'ft':
        for column in df.columns:
            df[column] *= M3_TO_FT3

    # filtering which ensembles you want to get out of the dataframe of them all
    if ensemble != 'all':
        requested_ensembles = []
        for ens in ensemble.split(','):
            # if there was a range requested with a '-', generate a list of numbers between the 2
            if '-' in ens:
                start, end = ens.split('-')
                for i in range(int(start), int(end) + 1):
                    requested_ensembles.append(
                        f'ensemble_{int(i):02}_{units_title}^3/s')
            else:
                requested_ensembles.append(
                    f'ensemble_{int(ens):02}_{units_title}^3/s')
        # make a list of columns to remove from the dataframe deleting the requested ens from all ens columns
        for ens in requested_ensembles:
            if ens in ensemble_column_names:
                ensemble_column_names.remove(ens)
        # delete the dataframe columns we aren't interested
        for ens in ensemble_column_names:
            del df[ens]

    if return_format == 'csv':
        response = make_response(df.to_csv())
        response.headers['content-type'] = 'text/csv'
        response.headers['Content-Disposition'] = \
            f'attachment; filename=forecasted_ensembles_{region}_{reach_id}_{units_title}^3/s.csv'
        return response

    # for any column in the dataframe (e.g. each ensemble)
    ensemble_ts_dict = {
        'datetime':
        df[f'ensemble_01_{units_title}^3/s'].dropna(
            inplace=False).index.tolist(),
        'datetime_high_res':
        df[f'ensemble_52_{units_title}^3/s'].dropna(
            inplace=False).index.tolist(),
    }
    for column in df.columns:
        ensemble_ts_dict[column] = df[column].dropna(inplace=False).tolist()

    context = {
        'region': region,
        'comid': reach_id,
        'startdate': df.index[0],
        'enddate': df.index[-1],
        'gendate': dt.utcnow().isoformat() + 'Z',
        'time_series': ensemble_ts_dict,
        'units': {
            'name': 'Streamflow',
            'short': f'{units_title}3/s',
            'long': f'Cubic {units_title_long} per Second'
        }
    }

    if return_format == 'json':
        return jsonify(context)

    else:
        raise ValueError('Invalid return_format')