def get_twc_forecast(stid, api_key, forecast_date): # retrieve api json data api_url = 'https://api.weather.com/v3/wx/forecast/daily/5day' api_options = { 'language': 'en-US', 'format': 'json', 'units': 'e', 'apiKey': api_key, 'icaoCode': stid } response = requests.get(api_url, params=api_options) twc_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('twc: got HTTP error when querying API') raise # The data has a 'daypart' section which has a time series of day/night pairs. This is useful for wind and # precipitation information, but we have to make some assumptions about the datetime to use it. twc_df = pd.DataFrame(twc_data['daypart'][0]) valid_days = [epoch_time_to_datetime(d) for d in twc_data['validTimeUtc'] for _ in range(2)] twc_df['DateTime'] = pd.Series(valid_days) + twc_df['dayOrNight'].apply(dn_to_timedelta) if twc_df['dayOrNight'][0] is None: twc_df.drop(0, axis=0, inplace=True) # Add dew point, fix units, and rename twc_df['dewpoint'] = dewpoint_from_t_rh(twc_df['temperature'], twc_df['relativeHumidity']) twc_df['windSpeed'] = twc_df['windSpeed'].apply(mph_to_kt) column_names_dict = { 'cloudCover': 'cloud', 'qpf': 'rain', 'wxPhraseLong': 'condition' } twc_df = twc_df.rename(columns=column_names_dict) twc_df.set_index('DateTime', inplace=True) # Resample to 3-hourly. Carefully consider the QPF. offset = twc_df.index[0].hour % 3 twc_hourly = twc_df.resample('3H', base=offset).interpolate() twc_hourly['rain'] = twc_hourly['rain'].apply(lambda x: x / 4.) twc_hourly['qpfSnow'] = twc_hourly['qpfSnow'].apply(lambda x: x / 4.) twc_hourly['windDirection'] = twc_hourly['windDirection'].round() # calculate daily values forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) daily_high = twc_hourly.loc[forecast_start:forecast_end, 'temperature'].max() daily_low = twc_hourly.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = twc_hourly.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = twc_hourly.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = twc_hourly.reset_index() return forecast
def get_accuwx_forecast(config, stid, location_key, api_key, forecast_date): """ Get a Forecast from the AccuWeather API or the cache file. :param config: :param stid: :param location_key: :param api_key: :param forecast_date: :return: """ # Check if we have a cached file and if it is recent enough site_directory = '%s/site_data' % config['THETAE_ROOT'] cache_file = '%s/%s_accuwx.txt' % (site_directory, stid) cache_ok = check_cache_file(config, cache_file) # Retrieve data. Looks like only daily temperatures will be of any use right now. if not cache_ok: api_url = 'http://dataservice.accuweather.com/forecasts/v1/daily/5day/%s' % location_key api_options = {'apikey': api_key} response = requests.get(api_url, params=api_options) accuwx_data = response.json() # Raise error if we have invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('accuweather: got HTTP error when querying API') raise # Cache the response with open(cache_file, 'w') as f: f.write(response.text) else: accuwx_data = json.load(open(cache_file)) # Convert to pandas DataFrame, fix time, and get high and low accuwx_df = pd.DataFrame(accuwx_data['DailyForecasts']) accuwx_df['DateTime'] = np.nan for idx in accuwx_df.index: accuwx_df.loc[idx, 'DateTime'] = localized_date_to_utc( parse_iso(accuwx_df.loc[idx, 'Date'])).replace(hour=0) accuwx_df.set_index('DateTime', inplace=True) high = float(accuwx_df.loc[forecast_date, 'Temperature']['Maximum']['Value']) # Low should be for night before. We can also 'guess' that the low could be non-diurnal and halfway between the # max and next min. low = float(accuwx_df.loc[forecast_date - timedelta(days=1), 'Temperature']['Minimum']['Value']) alt_low = 0.5 * (high + float( accuwx_df.loc[forecast_date, 'Temperature']['Minimum']['Value'])) if low - alt_low > 3: if config['debug'] > 9: print( 'accuweather: warning: setting low down from %0.0f to %0.0f' % (low, alt_low)) low = alt_low # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(high, low, None, None) return forecast
def get_yrno_forecast(stid, state, city, forecast_date): """ Retrieve yr.no forecast for a city, state :param stid: :param state: :param city: :param forecast_date: :return: """ yrno_url = 'https://www.yr.no/place/United_States/%s/%s/forecast_hour_by_hour.xml' % (state, city) response = requests.get(yrno_url) # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('nws: got HTTP error when querying for XML file from %s' % yrno_url) raise # Get the XML tree into dictionary form hourly_xml = eTree.fromstring(response.text) hourly_dict = etree_to_dict(hourly_xml) hourly_list = hourly_dict['weatherdata']['forecast']['tabular']['time'] timezone = hourly_dict['weatherdata']['location']['timezone']['@id'] # Create a DataFrame for hourly data hourly = pd.DataFrame() hourly['DateTime'] = [v['@from'] for v in hourly_list] hourly['DateTime'] = hourly['DateTime'].apply(parse_iso).apply(lambda x: x.tz_localize(timezone)) hourly['DateTime'] = hourly['DateTime'].apply(lambda x: x.astimezone('UTC').replace(tzinfo=None)) hourly['datetime_index'] = hourly['DateTime'] hourly.set_index('datetime_index', inplace=True) # Add in the other parameters hourly['temperature'] = [c_to_f(to_float(v['temperature']['@value'])) for v in hourly_list] hourly['windSpeed'] = [ms_to_kt(to_float(v['windSpeed']['@mps'])) for v in hourly_list] hourly['windDirection'] = [to_float(v['windDirection']['@deg']) for v in hourly_list] hourly['pressure'] = [to_float(v['pressure']['@value']) for v in hourly_list] hourly['condition'] = [v['symbol']['@name'] for v in hourly_list] hourly['rain'] = [mm_to_in(to_float(v['precipitation']['@value'])) for v in hourly_list] # Aggregate daily values from hourly series forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) hourly_high = hourly.loc[forecast_start:forecast_end, 'temperature'].max() hourly_low = hourly.loc[forecast_start:forecast_end, 'temperature'].min() hourly_wind = hourly.loc[forecast_start:forecast_end, 'windSpeed'].max() hourly_rain = hourly.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # Create the Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(hourly_high, hourly_low, hourly_wind, hourly_rain) forecast.timeseries.data = hourly return forecast
def get_darksky_forecast(stid, lat, lon, api_key, forecast_date): # Retrieve data api_url = 'https://api.darksky.net/forecast/%s/%s' point = '%0.3f,%0.3f' % (lat, lon) api_options = {'exclude': 'currently,minutely,daily,alerts,flags'} json_url = api_url % (api_key, point) response = requests.get(json_url, params=api_options) darksky_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('darksky: got HTTP error when querying API') raise # Convert to pandas DataFrame and fix time, units, and columns darksky_df = pd.DataFrame(darksky_data['hourly']['data']) darksky_df['DateTime'] = np.nan for idx in darksky_df.index: darksky_df.loc[idx, 'DateTime'] = epoch_time_to_datetime( darksky_df.loc[idx, 'time']) # already UTC darksky_df.set_index('DateTime', inplace=True) column_names_dict = { 'cloudCover': 'cloud', 'dewPoint': 'dewpoint', 'precipIntensity': 'rain', 'windBearing': 'windDirection', 'summary': 'condition' } darksky_df = darksky_df.rename(columns=column_names_dict) darksky_df.loc[:, 'cloud'] = 100. * darksky_df.loc[:, 'cloud'] darksky_df.loc[:, 'windSpeed'] = mph_to_kt(darksky_df.loc[:, 'windSpeed']) darksky_df.loc[:, 'windGust'] = mph_to_kt(darksky_df.loc[:, 'windGust']) # Calculate daily values forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) daily_high = darksky_df.loc[forecast_start:forecast_end, 'temperature'].max() daily_low = darksky_df.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = darksky_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = darksky_df.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = darksky_df.reset_index() return forecast
def get_mosx_forecast(stid, mosx_dir, forecast_date): # Retrieve data mosx_file = '%s/MOSX_%s_%s.json' % ( mosx_dir, stid.upper(), datetime.strftime(forecast_date, '%Y%m%d')) with open(mosx_file, 'r') as f: data = json.load(f) # Create a Forecast object and add daily values forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(data['daily']['high'], data['daily']['low'], data['daily']['wind'], data['daily']['precip']) # Set the hourly data if it is present. Column names are already set! if 'hourly' in data: hourly_ds = pd.DataFrame(data['hourly']) hourly_ds['DateTime'] = hourly_ds['DateTime'].apply(date_to_datetime) forecast.timeseries.data = hourly_ds return forecast
def gfs_mos_forecast(stid, forecast_date): """ Do the data retrieval. """ # Generate a Forecast object forecast = Forecast(stid, default_model_name, forecast_date) import numpy as np forecast.daily.high = np.round(np.random.rand() * 100.) forecast.daily.low = np.round(np.random.rand() * 100.) forecast.daily.wind = np.round(np.random.rand() * 40.) forecast.daily.rain = np.round(np.random.rand() * 3., 2) # Create a dummy pd dataframe to test forecast.timeseries.data['DateTime'] = [forecast_date, forecast_date + timedelta(hours=3)] forecast.timeseries.data['temperature'] = [56., 55.] forecast.timeseries.data['dewpoint'] = [51., 51.] return forecast
def get_aeris_forecast(stid, lat, lon, api_id, api_secret, forecast_date): # Retrieve data api_url = 'https://api.aerisapi.com/forecasts/%s' point = '%0.3f,%0.3f' % (lat, lon) api_options = { 'client_id': api_id, 'client_secret': api_secret, 'filter': '1hr', 'plimit': '60', } json_url = api_url % point response = requests.get(json_url, params=api_options) aeris_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('aeris: got HTTP error when querying API') raise # Convert to pandas DataFrame and fix time, units, and columns aeris_df = pd.DataFrame(aeris_data['response'][0]['periods']) aeris_df['DateTime'] = np.nan for idx in aeris_df.index: aeris_df.loc[idx, 'DateTime'] = localized_date_to_utc( parse_iso(aeris_df.loc[idx, 'dateTimeISO'])) aeris_df.set_index('DateTime', inplace=True) column_names_dict = { 'avgTempF': 'temperature', 'avgDewpointF': 'dewpoint', 'sky': 'cloud', 'windSpeedMaxKTS': 'windSpeed', 'windGustKTS': 'windGust', 'windDirDEG': 'windDirection', 'precipIN': 'rain', 'pressureMB': 'pressure', 'weatherPrimary': 'condition' } aeris_df = aeris_df.rename(columns=column_names_dict) # Calculate daily values. Aeris includes period maxima and minima, although they appear just to be hourly values. forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) try: daily_high = aeris_df.loc[forecast_start:forecast_end, 'maxTempF'].max() except KeyError: daily_high = aeris_df.loc[forecast_start:forecast_end, 'temperature'].max() try: daily_low = aeris_df.loc[forecast_start:forecast_end, 'minTempF'].min() except KeyError: daily_low = aeris_df.loc[forecast_start:forecast_end, 'temperature'].min() try: daily_wind = aeris_df.loc[forecast_start:forecast_end, 'windSpeedMaxKTS'].max() except KeyError: daily_wind = aeris_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = aeris_df.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = aeris_df.reset_index() return forecast
def get_wunderground_forecast(stid, api_key, forecast_date): # retrieve api json data api_url = 'https://api.wunderground.com/api/%s/hourly/forecast/q/%s.json' api_options = {'features': 'hourly,forecast'} json_url = api_url % (api_key, stid) response = requests.get(json_url, params=api_options) wunderground_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('wunderground: got HTTP error when querying API') raise # Convert to DataFrame, fix time wunderground_df = pd.DataFrame(wunderground_data['hourly_forecast']) timezone_df = pd.DataFrame(wunderground_data['forecast']['simpleforecast']) timezone = get_timezone(timezone_df['forecastday']) time_series = convert_fcttime(wunderground_df['FCTTIME']) # already UTC for column in wunderground_df.columns.values: wunderground_df[column] = wunderground_df[column].apply( get_english_units) wunderground_df['mslp'] = inhg_to_mb(wunderground_df['mslp']) wunderground_df['wspd'] = mph_to_kt(wunderground_df['wspd']) wunderground_df['wdir'] = wunderground_df['wdir'].apply(get_wind_degrees) column_names_dict = { 'FCTTIME': 'DateTime', 'temp': 'temperature', 'wspd': 'windSpeed', 'mslp': 'pressure', 'sky': 'cloud', 'dewpoint': 'dewpoint', 'qpf': 'rain', 'wdir': 'windDirection', 'wx': 'condition' } wunderground_df.drop('condition', axis=1, inplace=True) wunderground_df = wunderground_df.rename(columns=column_names_dict) wunderground_df['DateTime'] = time_series wunderground_df.set_index('DateTime', inplace=True) # calculate daily values forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) daily_high = wunderground_df.loc[forecast_start:forecast_end, 'temperature'].max() daily_low = wunderground_df.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = wunderground_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = wunderground_df.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = wunderground_df.reset_index() return forecast
def get_gefs_mos_forecast(stid, forecast_date): """ Retrieve GEFS MOS data. :param stid: station ID :param forecast_date: datetime of day to forecast :return: Forecast object for high, low, precip for next day. No wind. """ # Retrieve the model data url = 'http://www.nws.noaa.gov/cgi-bin/mos/getens.pl?sta=%s' % stid response = requests.get(url) page = response.text soup = BeautifulSoup(page, 'html.parser') # Lists for tomorrow's ensemble data ens_highs = [] ens_lows = [] ens_precips = [] dailys = [] # 22 total model runs pars = soup.find_all('pre') for ii in range(0, len(pars) - 1): # last one is operational run... don't use that text = pars[ii].text.split() # control run if ii == 0: # get model time dates = text[5].split('/') hour = int(text[6]) model_time = datetime(int(dates[2]), int(dates[0]), int(dates[1]), hour) # find all of the forecast hours (every 12 hr) forecast_hours_tmp = pars[ii].text.split('FHR')[1].split('\n')[0][:-6] forecast_hours = list(map(int, re.findall(r'\d+', forecast_hours_tmp))) # find all of the temperatures that match the forecast hours forecast_temps_tmp = pars[ii].text.split('X/N')[1].split('\n')[0][:-6] forecast_temps = list( map(int, re.findall(r'-?\d+', forecast_temps_tmp))) # find all of the 24 hour precips forecast_precip_tmp = pars[ii].text.split('Q24')[1].split('|')[1] forecast_precip = list( map(int, re.findall(r'\d+', forecast_precip_tmp)))[0:5] forecast_dates_utc = [] temps = [] for f, forecast_hour in enumerate(forecast_hours): # append forecast time, but subtract 1 hour so the 00Z time is for the correct date forecast_dates_utc.append( (model_time + timedelta(hours=forecast_hour - 1)).date()) temps.append(forecast_temps[f]) forecast_dates_utc = np.array(forecast_dates_utc) temps = np.array(temps) valid_dates = np.where((forecast_dates_utc == forecast_date.date()))[0] ens_highs.append(np.max(temps[valid_dates])) ens_lows.append(np.min(temps[valid_dates])) # the 24 hour precip for the next day is always the first value ens_precips.append(qpf_interpreter(forecast_precip[0])) # Add each member to the list of Daily objects, for writing to a file daily = Daily(stid, forecast_date) daily.model = 'GEFS MOS %d' % ii daily.set_values(ens_highs[-1], ens_lows[-1], None, ens_precips[-1]) dailys.append(daily) # Get ensemble mean high_mean = np.round(np.mean(ens_highs)) low_mean = np.round(np.mean(ens_lows)) precip_mean = np.round(np.mean(ens_precips), 2) # Create ensemble mean Forecast object mean_forecast = Forecast(stid, default_model_name, forecast_date) mean_forecast.daily.set_values(high_mean, low_mean, None, precip_mean) return mean_forecast, dailys
def bufr_surface_parser(config, model, stid, forecast_date, bufr_file_name): """ By Luke Madaus. Modified by jweyn and joejoezz. Parse surface data from a bufkit file. """ # Open bufkit file infile = open(bufr_file_name, 'r', newline='') # define variables dateTime = [] temperature = [] dewpoint = [] windSpeed = [] windDirection = [] rain = [] pressure = [] block_lines = [] inblock = False for line in infile: if re.search(r'SELV', line): try: # jweyn elev = re.search(r'SELV = -?(\d{1,4})', line).groups()[0] # jweyn: -? elev = float(elev) except: elev = 0.0 if line.startswith('STN YY'): # We've found the line that starts the header info inblock = True block_lines.append(line) elif inblock: # Keep appending lines until we start hitting numbers if re.search(r'\d{6}', line): inblock = False else: block_lines.append(line) # Build an re search pattern based on this # We know the first two parts of the section are station id num and date re_string = r"(\d{6}|\w{4}) (\d{6})/(\d{4})" # Now compute the remaining number of variables dum_num = len(block_lines[0].split()) - 2 for n in range(dum_num): re_string = re_string + r" (-?\d{1,4}.\d{2})" re_string = re_string + '\r\n' for line in block_lines[1:]: dum_num = len(line.split()) for n in range(dum_num): re_string = re_string + r'(-?\d{1,4}.\d{2}) ' re_string = re_string[:-1] # Get rid of the trailing space re_string = re_string + '\r\n' # Compile this re_string for more efficient re searches block_expr = re.compile(re_string) # Now get corresponding indices of the variables we need full_line = '' for r in block_lines: full_line = full_line + r[:-2] + ' ' # Now split it varlist = re.split(r'[ /]', full_line) # Now loop through all blocks that match the search pattern we defined above blocknum = -1 infile.seek(0) for block_match in block_expr.finditer(infile.read()): blocknum += 1 # Split out the match into each component number vals = list(block_match.groups()) # Check for missing values for v in range(len(vals)): if vals[v] == -9999.: vals[v] = np.nan # Set the time dt = '20' + vals[varlist.index('YYMMDD')] + vals[varlist.index('HHMM')] validtime = datetime.strptime(dt, '%Y%m%d%H%M') # End loop if we are more than 60 hours past the start of the forecast date if validtime > forecast_date + timedelta(hours=60): break # Append values at this time step dateTime.append(validtime) pressure.append(vals[varlist.index('PMSL')]) temperature.append(c_to_f(vals[varlist.index('T2MS')])) dewpoint.append(c_to_f(vals[varlist.index('TD2M')])) uwind = ms_to_kt(vals[varlist.index('UWND')]) vwind = ms_to_kt(vals[varlist.index('VWND')]) speed, dir = wind_uv_to_speed_dir(uwind, vwind) windSpeed.append(speed) windDirection.append(dir) if 'P01M' in varlist: rain.append(mm_to_in(vals[varlist.index('P01M')])) else: # This condition only applies to FV3 model: save 3 hr precipitation instead of 1 hour rain.append(mm_to_in(vals[varlist.index('P03M')])) infile.close() # first element of rain should be zero (sometimes it is -9999.99) rain[0] = '0.0' # Make into dataframe df = pd.DataFrame({ 'temperature': temperature, 'dewpoint': dewpoint, 'windSpeed': windSpeed, 'windDirection': windDirection, 'rain': rain, 'pressure': pressure, 'dateTime': dateTime }, index=dateTime) # Convert to forecast object forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) # Find forecast start location in timeseries try: # unlike the mos code, we always use the 'include' iloc_start_include = df.index.get_loc(forecast_start) except BaseException as e: print('bufkit: error getting start time index for %s; check data' % model) raise # Create forecast object and save timeseries forecast = Forecast(stid, model, forecast_date) forecast.timeseries.data = df # Find forecast end location in time series and save daily values if it exists if df.index[-1] >= forecast_end: iloc_end = df.index.get_loc(forecast_end) high = int(np.round(df.iloc[iloc_start_include:iloc_end]['temperature'].max())) low = int(np.round(df.iloc[iloc_start_include:iloc_end]['temperature'].min())) max_wind = int(np.round(df.iloc[iloc_start_include:iloc_end]['windSpeed'].max())) total_rain = np.sum(df.iloc[iloc_start_include + 1:iloc_end]['rain']) forecast.daily.set_values(high, low, max_wind, total_rain) else: if config['debug'] > 9: print('bufkit warning: model %s does not extend to end of forecast period; omitting daily values' % model) return forecast
def get_ukmet_forecast(stid, ukmet_code, forecast_date): """ Retrieve UKMET data. :param stid: station ID :param ukmet_code: site-specific URL code from ukmet.codes :param forecast_date: datetime of day to forecast :return: Forecast object for high, low, max wind for next 6Z--6Z. No precip. """ # Retrieve the model data url = 'https://www.metoffice.gov.uk/public/weather/forecast/%s' % ukmet_code req = Request(url, headers=hdr) response = urlopen(req) page = response.read().decode('utf-8', 'ignore') soup = BeautifulSoup(page, 'lxml') # Find UTC offset and current time in HTML utcoffset = int(soup.find(id='country').text.split('-')[1][0:2]) epoch = float(soup.find("td", {"id": "firstTimeStep"})['data-epoch']) utcnow = datetime.utcfromtimestamp(epoch) # Store daily variables days = [] highs = [] # this can be overwritten by hourly lows = [] # this can be overwritten by hourly winds = [] # this comes from hourly # Pull in daily data using li tabs tabids = ['tabDay1', 'tabDay2', 'tabDay3'] for ids in tabids: pars = soup.find(id=ids) days.append(datetime.strptime(pars['data-date'], '%Y-%m-%d')) highs.append( c_to_f( pars.findAll("span", {"title": "Maximum daytime temperature" })[0]['data-value-raw'])) lows.append( c_to_f( pars.findAll("span", {"title": "Minimum nighttime temperature" })[0]['data-value-raw'])) # Pull in hourly data # This requires PhantomJS to pull out additional HTML code driver = webdriver.PhantomJS( executable_path='/home/disk/p/wxchallenge/bin/phantomjs') driver.get(url + '#?date=2017-09-21') source = driver.page_source soup = BeautifulSoup(source, 'html.parser') dateTime = [] temperature = [] temperature_c = [] dewpoint = [] windSpeed = [] windGust = [] windDirection = [] humidity = [] # this is temporary--converted to dew point below divids = [ 'divDayModule0', 'divDayModule1', 'divDayModule2', 'divDayModule3' ] for i, divs in enumerate(divids): day0 = datetime.strptime( soup.find("div", {"id": "divDayModule0"})['data-content-id'], '%Y-%m-%d') day1 = (day0 + timedelta(days=1)).strftime('%Y-%m-%d') pars = soup.find(id=divs) divdate = datetime.strptime(pars['data-content-id'], '%Y-%m-%d').date() hourels = pars.findAll("tr", {"class": "weatherTime"})[0].find_all('td') for ii, ele in enumerate(hourels): if ele.text == 'Now': dateTime.append(utcnow) else: dtmp = datetime(divdate.year, divdate.month, divdate.day, int(ele.text.split(':')[0]), int(ele.text.split(':')[1])) dateTime.append(dtmp + timedelta(hours=utcoffset)) tempels = pars.findAll("tr", {"class": "weatherTemp"})[0].findAll( "i", {"class": "icon icon-animated"}) for ele in tempels: temperature_c.append(float(ele['data-value-raw'])) temperature.append(c_to_f(ele['data-value-raw'])) # relative humidity for conversion to dew point humels = pars.findAll("tr", {"class": "weatherHumidity"})[0].text.split() for ele in humels: humidity.append(float(ele.split('%')[0])) # add wind speedels = pars.findAll("i", {"data-type": "windSpeed"}) for ele in speedels: windSpeed.append(np.round(mph_to_kt(ele['data-value-raw']), 2)) gustels = pars.findAll("span", {"class": "gust"}) for ele in gustels: windGust.append(mph_to_kt(ele['data-value-raw'])) direls = pars.findAll("span", {"class": "direction"}) for ele in direls: windDirection.append(wind_dir_to_deg(ele.text)) # Convert T and humidity to dewpt for ii, rh in enumerate(humidity): td_tmp = dewpoint_from_t_rh(temperature_c[ii], rh) dewpoint.append(c_to_f(td_tmp)) # Make into dataframe df = pd.DataFrame( { 'temperature': temperature, 'dewpoint': dewpoint, 'windSpeed': windSpeed, 'windGust': windGust, 'windDirection': windDirection, 'dateTime': dateTime }, index=dateTime) # Correct the highs and lows with the hourly data, find max wind speed forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) for d in range(0, len(days)): try: # unlike the mos code, we always use the 'include' iloc_start_include = df.index.get_loc(forecast_start) except BaseException: print('ukmet: error getting start time index in db; check data.') break try: iloc_end = df.index.get_loc(forecast_end) except BaseException: print('ukmet: error getting end time index in db; check data.') break raw_high = df.iloc[iloc_start_include:iloc_end]['temperature'].max() raw_low = df.iloc[iloc_start_include:iloc_end]['temperature'].min() winds.append( int( np.round( df.iloc[iloc_start_include:iloc_end]['windSpeed'].max()))) if raw_high > highs[d]: highs[d] = raw_high if raw_low < lows[d]: lows[d] = raw_low forecast_start = forecast_start + timedelta(days=1) forecast_end = forecast_end + timedelta(days=1) forecast = Forecast(stid, default_model_name, days[0]) forecast.timeseries.data = df forecast.daily.set_values(highs[0], lows[0], winds[0], None) # # Make list of forecast objects for future days--currently not implemented # # forecast = [] # # for i in range(0,len(days)): # forecast_tmp = Forecast(stid, default_model_name, days[i]) # forecast_tmp.daily.date = days[i] # forecast_tmp.daily.high = highs[i] # forecast_tmp.daily.low = lows[i] # forecast.append(forecast_tmp) return forecast
def get_usl_forecast(config, stid, run, forecast_date): # Retrieve data api_url = 'http://www.microclimates.org/forecast/%s/%s.html' run_date = (forecast_date - timedelta(days=1)).replace(hour=int(run)) get_url = api_url % (stid, datetime.strftime(run_date, '%Y%m%d_%H')) try: response = urlopen(get_url) except HTTPError: if config['debug'] > 9: print("usl: forecast for %s at run time %s doesn't exist" % (stid, run_date)) raise usl_data = response.read().decode('utf-8') # Create a DataFrame forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) usl_df = pd.DataFrame( index=pd.date_range(forecast_start, forecast_end, freq='1H')) columns = [ 'temperature', 'dewpoint', 'humidity', 'soilTemperature', 'windDirection', 'windSpeed', 'cloud', 'netRadiation', 'rain' ] for column in columns: usl_df[column] = np.nan # Parse the values info = usl_data.split('<tr>') date_index = 0 for block in info: # Daily values, if that's the appropriate block if re.search('°F</td>', block): split_block = block.split('<td>') try: high = int( re.search('(-?\d{1,3})', split_block[1]).groups()[0]) low = int(re.search('(-?\d{1,3})', split_block[2]).groups()[0]) max_wind = int( re.search('(\d{1,3})', split_block[3]).groups()[0]) precip = float( re.search('(\d{1,3}.\d{2})', split_block[4]).groups()[0]) continue except: pass # Hourly values block = re.sub('<th scope="row" class="nobg3">', '', block) block = re.sub('<th scope="row" class="nobg">', '', block) block = re.sub('</th>', ',', block) block = re.sub('</td>', ',', block) block = re.sub('</tr>', '', block) block = re.sub('<td>', '', block) block = re.sub('<td class="hr3">', '', block) block = re.sub('\n', '', block) if re.search('Time', block): continue values = block.split(',')[ 1:-1] # Omit time and an extra space at the end values = [v.strip() for v in values] # Remove white space for v in range(len(values)): # Convert numbers to float try: values[v] = float(values[v]) except (TypeError, ValueError): pass if values[v] == '': values[v] = np.nan try: usl_df.loc[usl_df.index[date_index], :] = values date_index += 1 except (IndexError, ValueError): pass # Fix a couple of things usl_df['DateTime'] = usl_df.index for index in usl_df.index: usl_df.loc[index, 'windDirection'] = wind_dir_to_deg( usl_df.loc[index, 'windDirection']) usl_df['humidity'] = usl_df['humidity'].apply(remove_last_char) usl_df['cloud'] = usl_df['cloud'].apply(remove_last_char) # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(high, low, max_wind, precip) forecast.timeseries.data = usl_df return forecast
def get_owm_forecast(stid, lat, lon, api_key, forecast_date): # Retrieve data api_url = 'http://api.openweathermap.org/data/2.5/forecast' api_options = { 'APPID': api_key, 'lat': lat, 'lon': lon, 'units': 'imperial', } response = requests.get(api_url, params=api_options) owm_data = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('openweathermap: got HTTP error when querying API') raise # Convert to pandas DataFrame and fix time owm_df = pd.DataFrame(owm_data['list']) owm_df['DateTime'] = np.nan for idx in owm_df.index: owm_df.loc[idx, 'DateTime'] = date_to_datetime(owm_df.loc[idx, 'dt_txt']) owm_df.set_index('DateTime', inplace=True) # OWM has a column 'main' which contains some parameters at all times. Get all of those. for parameter in owm_df.loc[owm_df.index[0], 'main'].keys(): owm_df[parameter] = owm_df['main'].apply(get_parameter, args=(parameter, )) # Get some other special parameters # Make sure the 'rain' parameter exists (if no rain in forecast, the column is missing) if 'rain' not in owm_df: owm_df = owm_df.assign(**{'rain': 0.0}) else: owm_df.loc[:, 'rain'] = mm_to_in(owm_df['rain'].apply(get_parameter, args=('3h', ))) owm_df['condition'] = owm_df['weather'].apply(get_parameter, args=('description', ), is_list=True) owm_df['windSpeed'] = mph_to_kt(owm_df['wind'].apply(get_parameter, args=('speed', ))) owm_df['windDirection'] = owm_df['wind'].apply(get_parameter, args=('deg', )) owm_df['cloud'] = owm_df['clouds'].apply(get_parameter, args=('all', )) owm_df['dewpoint'] = np.nan for idx in owm_df.index: owm_df.loc[idx, 'dewpoint'] = dewpoint_from_t_rh( owm_df.loc[idx, 'temp'], owm_df.loc[idx, 'humidity']) # Rename remaining columns for default schema column_names_dict = { 'temp': 'temperature', } owm_df = owm_df.rename(columns=column_names_dict) # Calculate daily values. OWM includes period maxima and minima. Note that rain in OWM is cumulative for the LAST # 3 hours. forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) try: daily_high = owm_df.loc[forecast_start:forecast_end, 'temp_max'].max() except KeyError: daily_high = owm_df.loc[forecast_start:forecast_end, 'temperature'].max() try: daily_low = owm_df.loc[forecast_start:forecast_end, 'temp_min'].min() except KeyError: daily_low = owm_df.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = owm_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = np.nanmax([ owm_df.loc[forecast_start + timedelta(hours=3):forecast_end, 'rain'].sum(), 0.0 ]) # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = owm_df.reset_index() return forecast
def get_mos_forecast(stid, mos_model, init_date, forecast_date): """ Retrieve MOS data. No unit conversions, yay! :param stid: station ID :param mos_model: model name ('GFS' or 'NAM') :param init_date: datetime of model initialization :param forecast_date: datetime of day to forecast :return: Forecast object for forecast_date """ # Create forecast object forecast = Forecast(stid, default_model_name, forecast_date) # Retrieve the model data base_url = 'http://mesonet.agron.iastate.edu/mos/csv.php?station=%s&runtime=%s&model=%s' formatted_date = init_date.strftime('%Y-%m-%d%%20%H:00') url = base_url % (stid, formatted_date, mos_model) response = requests.get(url, stream=True) # Create pandas DataFrame df = pd.read_csv(response.raw, index_col=False) # Raise exception if DataFrame is empty if len(df.index) == 0: raise ValueError('mos: error: empty DataFrame; data missing.') date_index = pd.Index(pd.to_datetime(df['ftime'])).tz_localize(None) df['datetime'] = date_index # Remove duplicate rows df = df.drop_duplicates() # Fix rain df['q06'] = df['q06'].apply(qpf_interpreter) # Format the DataFrame for the default schema # Dictionary for renaming columns ts = df.copy() names_dict = { 'datetime': 'DateTime', 'tmp': 'temperature', 'dpt': 'dewpoint', 'wsp': 'windSpeed', 'wdr': 'windDirection', 'q06': 'rain' } col_names = list(map(''.join, ts.columns.values)) for col in col_names: if col not in names_dict.keys(): ts = ts.drop(col, axis=1) # Set the timeseries forecast.timeseries.data = ts.rename(columns=names_dict) # Now do the daily forecast part df = df.set_index('datetime') forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) # Some parameters need to include the forecast start; others, like total rain and 6-hour maxes, don't try: iloc_start_include = df.index.get_loc(forecast_start) iloc_start_exclude = iloc_start_include + 1 except BaseException: print('mos.py: error getting start time index in db; check data.') raise try: iloc_end = df.index.get_loc(forecast_end) + 1 except BaseException: print('mos.py: error getting end time index in db; check data.') raise raw_high = df.iloc[iloc_start_include:iloc_end]['tmp'].max() raw_low = df.iloc[iloc_start_include:iloc_end]['tmp'].min() nx_high = df.iloc[iloc_start_exclude:iloc_end]['n_x'].max() nx_low = df.iloc[iloc_start_exclude:iloc_end]['n_x'].min() # Set the daily forecast.daily.set_values(np.nanmax([raw_high, nx_high]), np.nanmin([raw_low, nx_low]), df.iloc[iloc_start_include:iloc_end]['wsp'].max(), df.iloc[iloc_start_exclude:iloc_end]['q06'].sum()) return forecast
def get_climacell_forecast(stid, lat, lon, api_key, forecast_date): # Retrieve data api_url = 'https://api.climacell.co/v3/weather/forecast/hourly' api_options = { 'apikey': api_key, 'lat': lat, 'lon': lon, 'unit_system': 'us', 'fields': 'precipitation,temp,dewpoint,wind_speed:knots,wind_gust:knots,baro_pressure:hPa,' 'wind_direction:degrees,cloud_cover:%,weather_code' } response = requests.get(api_url, params=api_options) # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('climacell: got HTTP error when querying API') raise clima_data = response.json() # Convert to pandas DataFrame and fix time, units, and columns clima_df = pd.DataFrame(clima_data) # Drop lat, lon and get values clima_df.drop(['lat', 'lon'], axis=1, inplace=True) clima_df = clima_df.apply(lambda y: y.apply(lambda x: x['value'])) column_names_dict = { 'observation_time': 'DateTime', 'temp': 'temperature', 'cloud_cover': 'cloud', 'precipitation': 'rain', 'baro_pressure': 'pressure', 'wind_speed': 'windSpeed', 'wind_gust': 'windGust', 'wind_direction': 'windDirection', 'weather_code': 'condition' } clima_df = clima_df.rename(columns=column_names_dict) clima_df['DateTime'] = clima_df['DateTime'].apply( lambda x: localized_date_to_utc(pd.Timestamp(x))) clima_df.set_index('DateTime', inplace=True) # Calculate daily values forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) daily_high = clima_df.loc[forecast_start:forecast_end, 'temperature'].max() daily_low = clima_df.loc[forecast_start:forecast_end, 'temperature'].min() daily_wind = clima_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = clima_df.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # Create Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) forecast.timeseries.data = clima_df.reset_index() return forecast
def get_nws_forecast(config, stid, lat, lon, forecast_date): """ Retrieve current NWS forecast for a point location. :param config: :param stid: str: station ID :param lat: float: latitude :param lon: float: longitude :param forecast_date: datetime: :return: """ hourly_url = 'http://forecast.weather.gov/MapClick.php?lat=%f&lon=%f&FcstType=digitalDWML' response = requests.get(hourly_url % (lat, lon)) # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('nws: got HTTP error when querying for XML file from %s' % (hourly_url % (lat, lon))) raise hourly_xml = eTree.fromstring(response.text) hourly_dict = etree_to_dict(hourly_xml) # Create a DataFrame for hourly data hourly = pd.DataFrame() hourly['DateTime'] = hourly_dict['dwml']['data']['time-layout'][ 'start-valid-time'] # De-localize the starting time so we can do an explicit datetime comparison hourly['DateTime'] = [ localized_date_to_utc(parse_iso(hourly['DateTime'].iloc[j])) for j in range(len(hourly['DateTime'])) ] hourly['DateTime'] = [ hourly['DateTime'].iloc[j].to_pydatetime().replace(tzinfo=None) for j in range(len(hourly['DateTime'])) ] hourly['datetime_index'] = hourly['DateTime'] hourly.set_index('datetime_index', inplace=True) parameters = hourly_dict['dwml']['data']['parameters'] # Get the temperatures for element in parameters['temperature']: if element['@type'] == 'hourly': hourly['temperature'] = xml_to_values(element['value']) elif element['@type'] == 'dew point': hourly['dewPoint'] = xml_to_values(element['value']) # Get the winds for element in parameters['wind-speed']: if element['@type'] == 'sustained': hourly['windSpeed'] = xml_to_values(element['value']) hourly['windSpeed'] = mph_to_kt(hourly['windSpeed']) elif element['@type'] == 'gust': hourly['windGust'] = xml_to_values(element['value']) hourly['windGust'] = mph_to_kt(hourly['windGust']) # Get other parameters hourly['cloud'] = xml_to_values(parameters['cloud-amount']['value']) hourly['windDirection'] = xml_to_values(parameters['direction']['value']) hourly['rain'] = xml_to_values(parameters['hourly-qpf']['value']) try: hourly['condition'] = xml_to_condition( parameters['weather']['weather-conditions']) except: pass # Aggregate daily values from hourly series forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) hourly_high = hourly.loc[forecast_start:forecast_end, 'temperature'].max() hourly_low = hourly.loc[forecast_start:forecast_end, 'temperature'].min() hourly_wind = hourly.loc[forecast_start:forecast_end, 'windSpeed'].max() hourly_rain = hourly.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() # Create the Forecast object forecast = Forecast(stid, default_model_name, forecast_date) forecast.daily.set_values(hourly_high, hourly_low, hourly_wind, hourly_rain) forecast.timeseries.data = hourly # Now do the daily data from the Forecast API api_url = 'https://api.weather.gov/points' point = '%0.3f,%0.3f' % (lat, lon) # Retrieve daily forecast daily_url = '%s/%s/forecast' % (api_url, point) response = requests.get(daily_url) # Test for an error HTTP response. If there is an error response, omit the daily part. try: response.raise_for_status() daily_data = response.json() except BaseException as e: if config['debug'] > 0: print("nws: warning: no daily values used for %s ('%s')" % (stid, str(e))) return forecast # Daily values: convert to DataFrame daily = pd.DataFrame.from_dict(daily_data['properties']['periods']) # Change the wind to its max value daily['windSpeed'] = daily['windSpeed'].apply(wind_speed_interpreter) # De-localize the starting time so we can do an explicit datetime comparison daily['startTime'] = [ parse_iso(daily['startTime'].iloc[j]) for j in range(len(daily['startTime'])) ] daily['startTime'] = [ daily['startTime'].iloc[j].replace(tzinfo=None) for j in range(len(daily['startTime'])) ] daily.set_index('startTime', inplace=True) try: daily_high = daily.loc[forecast_date + timedelta(hours=6), 'temperature'] except KeyError: daily_high = np.nan try: daily_low = daily.loc[forecast_date - timedelta(hours=6), 'temperature'] except KeyError: daily_low = np.nan daily_wind = mph_to_kt( np.max(daily.loc[forecast_start:forecast_end]['windSpeed'])) # Update the Forecast object forecast.daily.set_values(np.nanmax([hourly_high, daily_high]), np.nanmin([hourly_low, daily_low]), np.nanmax([hourly_wind, daily_wind]), hourly_rain) return forecast
def get_ukmet_forecast(config, stid, lat, lon, api_id, api_secret, forecast_date): json_url = 'https://api-metoffice.apiconnect.ibmcloud.com/metoffice/production/v0/forecasts/point' headers = { 'x-ibm-client-id': api_id, 'x-ibm-client-secret': api_secret, 'accept': "application/json" } api_options = { 'excludeParameterMetaData': 'false', 'includeLocationName': 'false', 'latitude': lat, 'longitude': lon, } # Get hourly forecast data # Check if we have a cached hourly file and if it is recent enough site_directory = '%s/site_data' % config['THETAE_ROOT'] cache_file = '%s/%s_ukmet_hourly.txt' % (site_directory, stid) cache_ok = check_cache_file(config, cache_file, interval=4) if not cache_ok: response = requests.get('%s/hourly' % json_url, params=api_options, headers=headers) ukmet_data_hourly = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print('ukmet: got HTTP error when querying API for hourly data') raise # Cache the response with open(cache_file, 'w') as f: f.write(response.text) else: ukmet_data_hourly = json.load(open(cache_file)) # model run date--currently not using this but might be of interest later model_run_date = ukmet_data_hourly['features'][0]['properties'][ 'modelRunDate'] ukmet_df = pd.DataFrame( ukmet_data_hourly['features'][0]['properties']['timeSeries']) ukmet_df['DateTime'] = ukmet_df['time'].apply( pd.to_datetime).apply(lambda x: x.replace(tzinfo=None)) ukmet_df.set_index('DateTime', inplace=True) # rename columns column_names_dict = { 'screenTemperature': 'temperature', 'screenDewPointTemperature': 'dewpoint', 'windSpeed10m': 'windSpeed', 'windGustSpeed10m': 'windGust', 'windDirectionFrom10m': 'windDirection', 'precipitationRate': 'rain', # Assume constant in hour. parameter totalPrecipAmount no longer exists. 'mslp': 'pressure', } ukmet_df = ukmet_df.rename(columns=column_names_dict) # drop columns that we are not using ukmet_df.drop([ 'feelsLikeTemperature', 'probOfPrecipitation', 'screenRelativeHumidity', 'significantWeatherCode', 'totalSnowAmount', 'uvIndex', 'visibility' ], inplace=True, axis=1) # correct units ukmet_df['pressure'] /= 100. ukmet_df['temperature'] = c_to_f(ukmet_df['temperature']) ukmet_df['dewpoint'] = c_to_f(ukmet_df['dewpoint']) ukmet_df['windSpeed'] = ms_to_kt(ukmet_df['windSpeed']) ukmet_df['windGust'] = ms_to_kt(ukmet_df['windGust']) ukmet_df['rain'] = mm_to_in(ukmet_df['rain']) # Create Forecast object, save timeseries forecast = Forecast(stid, default_model_name, forecast_date) forecast.timeseries.data = ukmet_df.reset_index() forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) # Now use the daily API to find daily values # Check if we have a cached daily file and if it is recent enough site_directory = '%s/site_data' % config['THETAE_ROOT'] cache_file = '%s/%s_ukmet_daily.txt' % (site_directory, stid) cache_ok = check_cache_file(config, cache_file, interval=4) have_daily_values = True if not cache_ok: response = requests.get('%s/daily' % json_url, params=api_options, headers=headers) ukmet_data_daily = response.json() # Raise error for invalid HTTP response try: response.raise_for_status() except requests.exceptions.HTTPError: print( 'ukmet warning: got HTTP error when querying API for daily data; using hourly values' ) have_daily_values = False # Cache the response with open(cache_file, 'w') as f: f.write(response.text) else: ukmet_data_daily = json.load(open(cache_file)) # extract daily values for the forecast date if have_daily_values: ukmet_df_daily = pd.DataFrame( ukmet_data_daily['features'][0]['properties']['timeSeries']) ukmet_df_daily.set_index('time', inplace=True) ukmet_df_daily.index = pd.to_datetime(ukmet_df_daily.index) daily_forecast = ukmet_df_daily.loc[forecast_date] daytime_max = c_to_f(daily_forecast['dayMaxScreenTemperature']) nighttime_min = c_to_f(daily_forecast['nightMinScreenTemperature']) else: daytime_max = -1000. nighttime_min = 1000. # compare hourly temperature to daily--update if needed daily_high = ukmet_df.loc[forecast_start:forecast_end, 'temperature'].max() if daytime_max > daily_high: daily_high = daytime_max daily_low = ukmet_df.loc[forecast_start:forecast_end, 'temperature'].min() if nighttime_min < daily_low: daily_low = nighttime_min daily_wind = ukmet_df.loc[forecast_start:forecast_end, 'windSpeed'].max() daily_rain = ukmet_df.loc[forecast_start:forecast_end - timedelta(hours=1), 'rain'].sum() forecast.daily.set_values(daily_high, daily_low, daily_wind, daily_rain) return forecast