def put_24hr_observations(session):
    """get yesterdays observations

    Args
        session (Session): database session
    """
    # create a repo and pull all the weather stations from NOAA
    repo = Repository(session)
    stations = repo.get_all_stations(source='NOAA')

    # setup the day to retrieve
    yesterday = dt.datetime.now() - dt.timedelta(hours=24)
    yesterday = dt.datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day)

    # apply the api request to each station
    content = stations.apply(
        lambda station: make_station_observation_request(station, yesterday.isoformat()),
        axis=1
    ).values

    # put them all in the db
    added = 0
    for station_measurements in content:
        repo.put_measurements_from_list(station_measurements)
        added += len(station_measurements)

    return added
Example #2
0
def compute_station_river_distances():
    """compute the distance from every river to every weather station"""
    repo = Repository()

    runs = repo.get_all_runs()
    stations = repo.get_all_stations()

    # foreach run, find the close USGS, NOAA, and SNOW station
    for run in runs.iterrows():
        distances = stations.apply(lambda row: get_distance_between_geo_points(
            run[1].put_in_latitude, run[1].put_in_longitude, row.latitude, row.
            longitude, run[1].run_id, row.station_id, row.source),
                                   axis=1).apply(pd.Series)

        distances.sort_values('distance', inplace=True)

        usgs_ = distances[distances.source == 'USGS'].iloc[0, :]
        noaa_ = distances[distances.source == 'NOAA'].iloc[0, :]
        snow_ = distances[distances.source == 'SNOW'].iloc[0, :]

        usgs = StationRiverDistance(station_id=usgs_.station,
                                    run_id=run[1].run_id,
                                    distance=round(float(usgs_.distance), 2))

        noaa = StationRiverDistance(station_id=noaa_.station,
                                    run_id=run[1].run_id,
                                    distance=round(float(noaa_.distance), 2))

        snow = StationRiverDistance(station_id=snow_.station,
                                    run_id=run[1].run_id,
                                    distance=round(float(snow_.distance), 2))

        repo.put_station_river_distances([usgs, noaa, snow])
def get_usgs_site_ids():
    """ retrieve USGS site ids from database

    Returns:
        [str]: list of site ids
    """
    r = Repository()
    sites = r.get_all_stations(source="USGS")
    site_ids = [s for s in sites["station_id"]]
    return site_ids
def fill_noaa_gaps(start_date, end_date, db=settings.DATABASE):
    """use as needed to fill gaps in weather measurements

    Args:
        start_date: the start day, included in API calls
        end_date: the end day, inclusive
    """
    context = Context(db)
    session = context.Session()

    repo = Repository(session)
    stations = repo.get_all_stations(source='NOAA')
    total = 0

    # loop through each day retrieving observations
    while start_date <= end_date:
        content = stations.apply(
            lambda station: make_station_observation_request(station, start_date.isoformat()),
            axis=1
        ).values

        # put them all in the db
        added = 0
        for station_measurements in content:
            try:
                repo.put_measurements_from_list(station_measurements)
            except SQLAlchemyError:
                session.rollback()
                continue
            added += len(station_measurements)

            station = station_measurements[0].station
            print(f'added {added} measurements for station_id {station_measurements} - {start_date.isoformat()}')

        start_date += dt.timedelta(days=1)
        total += added

    return total