def put_24hr_observations(session):
    """get yesterdays observations

    Args
        session (Session): database session
    """
    # create a repo and pull all the weather stations from NOAA
    repo = Repository(session)
    stations = repo.get_all_stations(source='NOAA')

    # setup the day to retrieve
    yesterday = dt.datetime.now() - dt.timedelta(hours=24)
    yesterday = dt.datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day)

    # apply the api request to each station
    content = stations.apply(
        lambda station: make_station_observation_request(station, yesterday.isoformat()),
        axis=1
    ).values

    # put them all in the db
    added = 0
    for station_measurements in content:
        repo.put_measurements_from_list(station_measurements)
        added += len(station_measurements)

    return added
def upload_data_from_file(csv_file, from_csv=False):
    """ insert all records contained in file to database

    Args:
        csv_file (str): full path of CSV file containing records
        from_csv (bool): whether to insert into database using CSV or ORM (CSV scales better)

    Returns:
        bool: success/exception
    """
    r = Repository()

    if from_csv:
        success = r.put_measurements_from_csv(csv_file=csv_file)

    else:
        measurements = []
        with open(csv_file, "r") as f:
            for line in f:
                site_id, param_code, date_time, value = line.strip().split(",")
                measurement = Measurement(
                    station_id=site_id,
                    metric_id=param_code,
                    date_time=dateutil.parser.parse(date_time),
                    value=float(value)
                )
                measurements.append(measurement)
        success = r.put_measurements_from_list(measurements=measurements)

    return success
예제 #3
0
def compute_station_river_distances():
    """compute the distance from every river to every weather station"""
    repo = Repository()

    runs = repo.get_all_runs()
    stations = repo.get_all_stations()

    # foreach run, find the close USGS, NOAA, and SNOW station
    for run in runs.iterrows():
        distances = stations.apply(lambda row: get_distance_between_geo_points(
            run[1].put_in_latitude, run[1].put_in_longitude, row.latitude, row.
            longitude, run[1].run_id, row.station_id, row.source),
                                   axis=1).apply(pd.Series)

        distances.sort_values('distance', inplace=True)

        usgs_ = distances[distances.source == 'USGS'].iloc[0, :]
        noaa_ = distances[distances.source == 'NOAA'].iloc[0, :]
        snow_ = distances[distances.source == 'SNOW'].iloc[0, :]

        usgs = StationRiverDistance(station_id=usgs_.station,
                                    run_id=run[1].run_id,
                                    distance=round(float(usgs_.distance), 2))

        noaa = StationRiverDistance(station_id=noaa_.station,
                                    run_id=run[1].run_id,
                                    distance=round(float(noaa_.distance), 2))

        snow = StationRiverDistance(station_id=snow_.station,
                                    run_id=run[1].run_id,
                                    distance=round(float(snow_.distance), 2))

        repo.put_station_river_distances([usgs, noaa, snow])
def get_usgs_site_ids():
    """ retrieve USGS site ids from database

    Returns:
        [str]: list of site ids
    """
    r = Repository()
    sites = r.get_all_stations(source="USGS")
    site_ids = [s for s in sites["station_id"]]
    return site_ids
예제 #5
0
def compute_predictions(session):
    """compute and cache predictions for all runs

    Args:
        session: (Session) database connection

    Returns:
        True: if observations were successfully retrieved and inserted
        False: otherwise
    """
    try:
        arima = Arima(session)
        repo = Repository(session)

        runs = repo.get_all_runs_as_list()
        for run in runs:
            try:
                predictions = arima.arima_model(run.run_id)

                to_add = [
                    Prediction(run_id=run.run_id,
                               timestamp=pd.to_datetime(d),
                               fr_lb=round(float(p), 1),
                               fr=round(float(p), 1),
                               fr_ub=round(float(p), 1)) for p, d in
                    zip(predictions.values, predictions.index.values)
                ]

                repo.clear_predictions(run.run_id)
                repo.put_predictions(to_add)
                log(f'predictions for {run.run_id}-{run.run_name} added to db')

            except SQLAlchemyError as e:
                log(f'{run.run_id}-{run.run_name} failed - {[str(a) for a in e.args]}'
                    )
                session.rollback()

            except Exception as e:
                log(f'predictions for {run.run_id}-{run.run_name} failed - {[str(a) for a in e.args]}'
                    )

        return True

    except Exception as e:
        log(f'failed to compute daily predictions - {str(e.args)}')
        return False
예제 #6
0
    def setUpClass(cls):
        """perform at test class initialization

        Note:
            * ensure only a TContext is used NEVER Context or we'll lose all
            our hard-scraped data
            * any existing data in the mock db will be deleted
            * 5 random addresses are generated because nearly all unittests
            require addresses to exist as a foreign key dependency
        """
        cls.context = TContext()
        cls.session = cls.context.Session()
        cls.connection = psycopg2.connect(**settings.PSYCOPG_DB_TEST)
        cls.repo = Repository(session=cls.session, connection=cls.connection)

        cls.context.clear_dependency_data(cls.session)
        cls.context.generate_addresses(cls.session)
def get_noaa_predictions(run_id, session):
    """retrieve NOAA predictions for run

    Args
        run_id (int): run
        session (Session): database session

    Returns
        DataFrame: containing predictions
    """
    repo = Repository(session)
    run = repo.get_run(run_id)

    lat = run.put_in_latitude
    lon = run.put_in_longitude

    r = requests.get(f'https://api.weather.gov/points/{lat},{lon}/forecast/hourly')

    if r.status_code == 200 and len(r.content) > 10:
        return pd.DataFrame(r.json()['properties']['periods'])
    else:
        return None
def fill_noaa_gaps(start_date, end_date, db=settings.DATABASE):
    """use as needed to fill gaps in weather measurements

    Args:
        start_date: the start day, included in API calls
        end_date: the end day, inclusive
    """
    context = Context(db)
    session = context.Session()

    repo = Repository(session)
    stations = repo.get_all_stations(source='NOAA')
    total = 0

    # loop through each day retrieving observations
    while start_date <= end_date:
        content = stations.apply(
            lambda station: make_station_observation_request(station, start_date.isoformat()),
            axis=1
        ).values

        # put them all in the db
        added = 0
        for station_measurements in content:
            try:
                repo.put_measurements_from_list(station_measurements)
            except SQLAlchemyError:
                session.rollback()
                continue
            added += len(station_measurements)

            station = station_measurements[0].station
            print(f'added {added} measurements for station_id {station_measurements} - {start_date.isoformat()}')

        start_date += dt.timedelta(days=1)
        total += added

    return total
예제 #9
0
 def __init__(self, session):
     self.repo = Repository(session)
예제 #10
0
from riverrunner.repository import Repository
from riverrunner import settings

# IP address for running application
HOST_IP = '192.168.80.13'

# enable for application debugging features
DEBUG = False

# mapping from river's predicted status to a color code
COLOR_MAP = dict(unknown='#41434C',
                 optimal='#4254CC',
                 fair='#8F8A18',
                 not_recommended='#A63617')

repo = Repository()
runs = repo.get_all_runs_as_list()
runs = [run for run in runs if run.todays_runability != -2]
options = [r.select_option for r in runs]
options.sort(key=lambda r: r['label'])

# create a new Dash app adding custom fonts and CSS
app = dash.Dash()
font_url = 'https://fonts.googleapis.com/css?family=Montserrat|Permanent+Marker'
app.css.append_css({'external_url': font_url})


def color_scale(x):
    """prediction binning

    method bins river predictions into discrete categories for color coding
예제 #11
0
    test_model: runs stationarity tests and acf/pcf tests and then
    creates ARIMA model for one run and plots results
"""

import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import arma_order_select_ic
from riverrunner.repository import Repository

REPO = Repository()


def daily_avg(time_series):
    """Creates dataframe needed for modelling

    Takes time series with measurements on different timeframes and creates a
    dataframe with daily averages for flow rate and exogenous predictors.

    Args:
        time_series: dataframe with metrics for one run_id, assumes output
        from get_measurements function

    Returns:
        DataFrame: containing daily measurements
    """