def initialize_site_forecasts(api, site): """ Create a forecasts for each variable measured at the site Parameters ---------- api : solarforecastarbiter.io.api.APISession An active Reference user session. site : datamodel.Site The site object for which to create Forecasts. """ try: extra_params = common.decode_extra_parameters(site) except ValueError: logger.warning('Cannot create reference observations at PVDAQ site ' f'{site.name}, missing required parameters.') return site_api_id = int(extra_params['network_api_id']) with open(DEFAULT_SITEFILE) as fp: obs_metadata = json.load(fp)['observations'] obs_vars = [] for obs in obs_metadata: obs_extra_params = json.loads(obs['extra_parameters']) if obs_extra_params['network_api_id'] == site_api_id: obs_vars.append(obs['variable']) common.create_forecasts(api, site, obs_vars, default_forecasts.TEMPLATE_FORECASTS)
def update_observation_data(api, sites, observations, start, end): """Post new observation data to all MIDC observations from start to end. api : solarforecastarbiter.io.api.APISession An active Reference user session. sites: list List of all reference sites as Objects start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. """ midc_sites = common.filter_by_networks(sites, ['NREL MIDC']) for site in midc_sites: try: site_extra_params = common.decode_extra_parameters(site) except ValueError: continue try: obs_df = iotools.read_midc_raw_data_from_nrel( site_extra_params['network_api_id'], start, end) except IndexError: # The MIDC api returns a text file on 404 that is parsed as a # 1-column csv and causes an index error. logger.warning(f'Could not retrieve data for site {site.name}' f' between {start} and {end}.') continue site_observations = [obs for obs in observations if obs.site == site] for obs in site_observations: common.post_observation_data(api, obs, obs_df, start, end)
def initialize_site_observations(api, site): """Creates an observation at the site for each variable in the MIDC site's file. Parameters ---------- api : io.api.APISession API Session object, authenticated for the Reference user. site : datamodel.Site The site object for which to create the Observations. """ try: extra_params = common.decode_extra_parameters(site) except ValueError: logger.warning('Cannot create reference observations at MIDC site ' f'{site.name}, missing required parameters.') return site_api_id = extra_params['network_api_id'] for sfa_var, midc_var in midc_config.midc_var_map[site_api_id].items(): obs_extra_params = extra_params.copy() obs_extra_params['network_data_label'] = midc_var common.create_observation(api, site, sfa_var, extra_params=obs_extra_params)
def initialize_site_observations(api, site): """Creates an observation at the site for each variable in the PVDAQ site's file. Parameters ---------- api : io.api.APISession API Session object, authenticated for the Reference user. site : datamodel.Site The site object for which to create the Observations. """ try: extra_params = common.decode_extra_parameters(site) except ValueError: logger.warning('Cannot create reference observations at PVDAQ site ' f'{site.name}, missing required parameters.') return site_api_id = int(extra_params['network_api_id']) with open(DEFAULT_SITEFILE) as fp: obs_metadata = json.load(fp)['observations'] for obs in obs_metadata: obs_extra_params = json.loads(obs['extra_parameters']) if obs_extra_params['network_api_id'] == site_api_id: obs['site'] = site observation = Observation.from_dict(obs) common.check_and_post_observation(api, observation)
def initialize_site_observations(api, site): """Creates an observation at the site for each variable in the matched DOE_ARM_VARIABLE_MAP. Parameters ---------- api : solarforecastarbiter.io.api.APISession An active Reference user session. site : datamodel.Site The site object for which to create Observations. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: logger.error(f'Failed to initialize observations for {site.name} ' 'extra parameters could not be loaded.') return site_arm_vars = _determine_site_vars(site_extra_params['network_api_id']) site_sfa_vars = [DOE_ARM_VARIABLE_MAP[v] for v in site_arm_vars] for sfa_var in site_sfa_vars: logger.info(f'Creating {sfa_var} at {site.name}') try: common.create_observation(api, site, sfa_var) except HTTPError as e: logger.error(f'Could not create Observation for "{sfa_var}" ' f'at DOE ARM site {site.name}') logger.debug(f'Error: {e.response.text}')
def get_filename(site, year): """Get the applicable file name for CRN a site on a given date. """ extra_params = common.decode_extra_parameters(site) network_api_id = extra_params['network_api_id'] filename = f'{year}/CRNS0101-05-{year}-{network_api_id}.txt' return CRN_URL + filename
def fetch(api, site, start, end, realtime=False): """Retrieve observation data for a surfrad site between start and end. Parameters ---------- api : io.APISession An APISession with a valid JWT for accessing the Reference Data user. site : datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. realtime : bool Whether or not to look for realtime data. Note that this data is raw, unverified data from the instruments. Returns ------- data : pandas.DataFrame All of the requested data concatenated into a single DataFrame. """ if realtime: url_format = REALTIME_URL else: url_format = ARCHIVE_URL # load extra parameters for api arguments. extra_params = common.decode_extra_parameters(site) abbreviation = extra_params['network_api_abbreviation'] single_day_dfs = [] for day in pd.date_range(start, end): filename = url_format.format(abrv=abbreviation, year=day.year, year_2d=day.strftime('%y'), jday=day.strftime('%j')) logger.info(f'Requesting data for SURFRAD site {site.name}' f' on {day.strftime("%Y%m%d")}.') try: # Only get dataframe from the returned tuple surfrad_day = iotools.read_surfrad(filename)[0] except URLError: logger.warning(f'Could not retrieve SURFRAD data for site ' f'{site.name} on {day.strftime("%Y%m%d")}.') logger.debug(f'Failed SURFRAD URL: {filename}.') continue else: single_day_dfs.append(surfrad_day) try: all_period_data = pd.concat(single_day_dfs) except ValueError: logger.warning(f'No data available for site {site.name} ' f'from {start} to {end}.') return pd.DataFrame() all_period_data = all_period_data.rename( columns={'temp_air': 'air_temperature'}) return all_period_data
def fetch(api, site, start, end, *, eia_api_key): """Retrieve observation data for a EIA site between start and end. Parameters ---------- api : solarforecastarbiter.io.APISession Unused but conforms to common.update_site_observations call site : solarforecastarbiter.datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. eia_api_key : str API key for api.eia.gov Returns ------- data : pandas.DataFrame All of the requested data as a single DataFrame. Notes ----- Currently only fetches observations for net load [MW] (`f"EBA.{eia_site_id}.D.H"`), but EIA contains other variables that may be incorporated later (e.g. solar generation: `f"EBA.{eia_site_id}.NG.SUN.H"`). """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: return pd.DataFrame() eia_site_id = site_extra_params['network_api_id'] series_id = f"EBA.{eia_site_id}.D.H" # hourly net load (demand) obs_df = eia.get_eia_data( series_id, eia_api_key, start, end ) if obs_df.empty: logger.warning(f'Data for site {site.name} contained no ' f'entries from {start} to {end}.') return pd.DataFrame() obs_df = obs_df.rename(columns={"value": "net_load"}) return obs_df
def request_data(site, year, month): """Makes a request for each file type until successful or we run out of filetypes. Parameters ---------- interval_length: int The number of minutes between each timestep in the data. Used to lookup filetypes in FILE_TYPE_MAP. station: string The two character station abbreviation found in filenames. year: int The year of the data to request. month: int The month of the data to request. Returns ------- DataFrame A month of SRML data. """ extra_params = common.decode_extra_parameters(site) station_code = extra_params['network_api_abbreviation'] interval_length = extra_params['observation_interval_length'] file_types = FILE_TYPE_MAP[interval_length] for file_type in file_types: # The list file_types are listed with processed data # file types first. On a successful retrieval we return # the month of data, otherwise we log info and continue # until we've exhausted the list. try: srml_month = iotools.read_srml_month_from_solardat( station_code, year, month, file_type) except error.URLError: logger.warning(f'Could not retrieve {file_type} for SRML data ' f'for site {site.name} on {year}/{month} .') logger.debug(f'Site abbreviation: {station_code}') continue except pd.errors.EmptyDataError: logger.warning(f'SRML returned an empty file for station ' f'{site.name} on {year}/{month}.') continue else: return srml_month
def fetch(api, site, start, end, *, nrel_pvdaq_api_key): """Retrieve observation data for a PVDAQ site between start and end. Parameters ---------- api : io.APISession Unused but conforms to common.update_site_observations call site : datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. nrel_pvdaq_api_key : str API key for developer.nrel.gov Returns ------- data : pandas.DataFrame All of the requested data concatenated into a single DataFrame. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: return pd.DataFrame() try: years = list(range(start.year, end.year + 1)) obs_df = pvdaq.get_pvdaq_data(site_extra_params['network_api_id'], years, api_key=nrel_pvdaq_api_key) except Exception: # Not yet sure what kind of errors we might hit in production logger.warning(f'Could not retrieve data for site {site.name}' f' between {start} and {end}.') return pd.DataFrame() obs_df = _watts_to_mw(obs_df) try: obs_df = obs_df.tz_localize(site.timezone) except NonExistentTimeError as e: logger.warning(f'Could not localize data for site {site.name} ' f'due to DST issue: {e}') return pd.DataFrame() return obs_df
def initialize_site_forecasts(api, site): """ Create a forecasts for each variable measured at the site Parameters ---------- api : solarforecastarbiter.io.api.APISession An active Reference user session. site : datamodel.Site The site object for which to create Forecasts. """ try: extra_params = common.decode_extra_parameters(site) except ValueError: logger.warning('Cannot create reference observations at MIDC site ' f'{site.name}, missing required parameters.') return site_api_id = extra_params['network_api_id'] common.create_forecasts( api, site, midc_config.midc_var_map[site_api_id].keys(), default_forecasts.TEMPLATE_FORECASTS)
def initialize_site_forecasts(api, site): """ Create a forecast for each variable at the site. Parameters ---------- api : solarforecastarbiter.io.api.APISession An active Reference user session. site : datamodel.Site The site object for which to create Forecasts. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: logger.error('Failed to initialize reference forecasts for ' f'{site.name} extra parameters could not be loaded.') return site_arm_vars = _determine_site_vars(site_extra_params['network_api_id']) sfa_vars = [DOE_ARM_VARIABLE_MAP[v] for v in site_arm_vars] common.create_forecasts(api, site, sfa_vars, default_forecasts.TEMPLATE_FORECASTS)
def fetch(api, site, start, end, *, doe_rtc_api_key): """Retrieve observation data for a DOE RTC site between start and end. Parameters ---------- api : io.APISession Unused but conforms to common.update_site_observations call site : datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. doe_rtc_api_key : str API key to access the DOE RTC api Returns ------- data : pandas.DataFrame All of the requested data concatenated into a single DataFrame. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: return pd.DataFrame() doe_rtc_site_id = site_extra_params['network_api_id'] obs_df = rtc.fetch_doe_rtc( doe_rtc_site_id, doe_rtc_api_key, start.tz_convert(site.timezone), end.tz_convert(site.timezone)) if obs_df.empty: logger.warning(f'Data for site {site.name} contained no ' f'entries from {start} to {end}.') return pd.DataFrame() obs_df = obs_df.rename(columns=DOE_RTC_VARIABLE_MAP).tz_localize( site.timezone) # W to MW if 'ac_power' in obs_df: obs_df['ac_power'] = obs_df['ac_power'] / 1e6 return obs_df
def initialize_site_forecasts(api, site): """ Create a forecast for each variable at the site. Parameters ---------- api : solarforecastarbiter.io.api.APISession An active Reference user session. site : datamodel.Site The site object for which to create Forecasts. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: logger.error('Failed to initialize reference forecasts for ' f'{site.name} extra parameters could not be loaded.') return site_vars = site_variables_from_extra_params(site_extra_params) common.create_forecasts(api, site, site_vars, default_forecasts.TEMPLATE_FORECASTS)
def fetch(api, site, start, end, *, doe_arm_user_id, doe_arm_api_key): """Retrieve observation data for a DOE ARM site between start and end. Parameters ---------- api : io.APISession Unused but conforms to common.update_site_observations call site : datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. doe_arm_user_id : str User ID to access the DOE ARM api. doe_arm_api_key : str API key to access the DOE ARM api. Returns ------- data : pandas.DataFrame All of the requested data concatenated into a single DataFrame. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: return pd.DataFrame() doe_arm_datastream = site_extra_params['network_api_id'] site_vars = _determine_site_vars(doe_arm_datastream) obs_df = arm.fetch_arm(doe_arm_user_id, doe_arm_api_key, doe_arm_datastream, site_vars, start.tz_convert(site.timezone), end.tz_convert(site.timezone)) if obs_df.empty: logger.warning(f'Data for site {site.name} contained no ' f'entries from {start} to {end}.') return pd.DataFrame() obs_df = obs_df.rename(columns=DOE_ARM_VARIABLE_MAP) return obs_df
def update_observation_data(api, sites, observations, start, end): """Post new observation data to a list of Surfrad Observations from start to end. api : solarforecastarbiter.io.api.APISession An active Reference user session. sites: list of solarforecastarbiter.datamodel.Site List of all reference sites as Objects observations: list of solarforecastarbiter.datamodel.Observation List of all reference observations. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. """ sandia_api_key = os.getenv('SANDIA_API_KEY') if sandia_api_key is None: raise KeyError('"SANDIA_API_KEY" environment variable must be ' 'set to update SANDIA observation data.') sandia_sites = common.filter_by_networks(sites, 'SANDIA') for site in sandia_sites: try: site_extra_params = common.decode_extra_parameters(site) except ValueError: continue sandia_site_id = site_extra_params['network_api_id'] obs_df = sandia.fetch_sandia( sandia_site_id, sandia_api_key, start.tz_convert(site.timezone), end.tz_convert(site.timezone)) obs_df = obs_df.rename(columns=SANDIA_VARIABLE_MAP).tz_localize( site.timezone) data_in_range = obs_df[start:end] if data_in_range.empty: logger.warning(f'Data for site {site.name} contained no ' f'entries from {start} to {end}.') continue site_observations = [obs for obs in observations if obs.site == site] for obs in site_observations: common.post_observation_data(api, obs, data_in_range, start, end)
def fetch(api, site, start, end): """Retrieve observation data for a MIDC site between start and end. Parameters ---------- api : io.APISession Unused but conforms to common.update_site_observations call site : datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. Returns ------- data : pandas.DataFrame All of the requested data concatenated into a single DataFrame. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: return pd.DataFrame() try: obs_df = iotools.read_midc_raw_data_from_nrel( site_extra_params['network_api_id'], start, end) except IndexError: # The MIDC api returns a text file on 404 that is parsed as a # 1-column csv and causes an index error. logger.warning(f'Could not retrieve data for site {site.name}' f' between {start} and {end}.') return pd.DataFrame() except ValueError as e: logger.error(f'Error retrieving data for site {site.name}' f' between {start} and {end}: %s', e) return pd.DataFrame() return obs_df
def test_decode_extra_parameters_error(site): with pytest.raises(ValueError): common.decode_extra_parameters(Site.from_dict(site))
def test_decode_extra_parameters(): metadata = Site.from_dict(site_string_dicts[0]) params = common.decode_extra_parameters(metadata) assert params['network'] == 'DOE ARM' assert params['observation_interval_length'] == 1
def initialize_site_observations(api, site): """Creates an observation at the site for each variable in an SRML site's file. Parameters ---------- api: :py:class:`solarforecastarbiter.io.api.APISession` site : :py:class:`solarforecastarbiter.datamodel.Site The site object for which to create Observations. Notes ----- Since variables are labelled with an integer instrument number, Observations are named with their variable and instrument number found in the source files. e.g. A SRML file contains two columns labelled, 1001, and 1002. These columns represent GHI at instrument 1 and instrument 2 respectively. The `pvlib.iotools` package converts these to 'ghi_1' and 'ghi_2' for us. We use these labels to differentiate between measurements recorded by different instruments. """ # Request ~month old data at initialization to ensure we get a response. start = pd.Timestamp.utcnow() - pd.Timedelta('30 days') end = start try: extra_params = common.decode_extra_parameters(site) except ValueError: logger.warning('Cannot create reference observations at MIDC site ' f'{site.name}, missing required parameters.') return # use site name without network here to build # a name with the original column label rather than # the SFA variable site_name = common.site_name_no_network(site) try: site_df = fetch(api, site, start, end) except error.HTTPError: logger.error('Could not find data to create observations ' f'for SRML site {site_name}.') return else: if site_df is None: logger.error('Could not find data to create observations ' f'for SRML site {site_name}.') return for variable in srml_variable_map.keys(): matches = [col for col in site_df.columns if col.startswith(variable)] for match in matches: observation_extra_parameters = extra_params.copy() observation_extra_parameters.update({ 'network_data_label': match}) try: # Here, we pass a name with match instead of variable # to differentiate between multiple observations of # the same variable common.create_observation( api, site, srml_variable_map[variable], name=f'{site_name} {match}', interval_label='beginning', extra_params=observation_extra_parameters) except HTTPError as e: logger.error( f'Failed to create {variable} observation at Site ' f'{site.name}. Error: {e.response.text}') with open(DEFAULT_SITEFILE) as fp: obs_metadata = json.load(fp)['observations'] for obs in obs_metadata: obs_site_extra_params = json.loads(obs['site']['extra_parameters']) if obs_site_extra_params['network_api_id'] == extra_params[ 'network_api_id']: obs['site'] = site observation = Observation.from_dict(obs) common.check_and_post_observation(api, observation)
def fetch(api, site, start, end, *, doe_arm_user_id, doe_arm_api_key): """Retrieve observation data for a DOE ARM site between start and end. Parameters ---------- api : io.APISession Unused but conforms to common.update_site_observations call site : datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. doe_arm_user_id : str User ID to access the DOE ARM api. doe_arm_api_key : str API key to access the DOE ARM api. Returns ------- data : pandas.DataFrame All of the requested data concatenated into a single DataFrame. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: return pd.DataFrame() available_datastreams = site_extra_params['datastreams'] datastreams = {} # Build a dict with top-level keys to 'met' and 'qcrad' if meteorological # or irradiance data exists at the site. This is to later group dataframes # created from each datastream by the type of data found in the stream. for ds_type in ['met', 'qcrad']: if ds_type in available_datastreams: ds_type_dict = {} streams = available_datastreams[ds_type] # When a dict is present each key is a datastream and value is # a date range for which the datastream contains data. We need to # determine which streams to use to get all of the requested data. if isinstance(streams, dict): ds_type_dict.update( find_stream_data_availability(streams, start, end)) else: # If a single string datastream name exists, we assume that all # available data is contained in the stream. Deferring to the # data fetch process, which will fail to retrieve data and # continue gracefully. ds_type_dict[streams] = (start, end) datastreams[ds_type] = ds_type_dict site_dfs = [] for stream_type in datastreams: # Stitch together all the datastreams with similar data. stream_type_dfs = [] for datastream, date_range in datastreams[stream_type].items(): stream_df = arm.fetch_arm(doe_arm_user_id, doe_arm_api_key, datastream, _determine_stream_vars(datastream), date_range[0].tz_convert(site.timezone), date_range[1].tz_convert(site.timezone)) if stream_df.empty: logger.warning(f'Datastream {datastream} for site {site.name} ' f'contained no entries from {start} to {end}.') else: stream_type_dfs.append(stream_df) if stream_type_dfs: # Concatenate all dataframes of similar data stream_type_df = pd.concat(stream_type_dfs) site_dfs.append(stream_type_df) if site_dfs: # Join dataframes with different variables along the index, this has # the side effect of introducing missing data if any requests have # failed. obs_df = pd.concat(site_dfs, axis=1) obs_df = obs_df.rename(columns=DOE_ARM_VARIABLE_MAP) return obs_df else: logger.warning(f'Data for site {site.name} contained no entries from ' f'{start} to {end}.') return pd.DataFrame()
def initialize_site_observations(api, site): """Creates an observation at the site for each variable in an SRML site's file. Parameters ---------- api: io.api.APISession site : datamodel.Site The site object for which to create Observations. Notes ----- Since variables are labelled with an integer instrument number, Observations are named with their variable and instrument number found in the source files. e.g. A SRML file contains two columns labelled, 1001, and 1002. These columns represent GHI at instrument 1 and instrument 2 respectively. The `pvlib.iotools` package converts these to 'ghi_1' and 'ghi_2' for us. We use these labels to differentiate between measurements recorded by different instruments. """ start = pd.Timestamp.now() end = pd.Timestamp.now() try: extra_params = common.decode_extra_parameters(site) except ValueError: logger.warning('Cannot create reference observations at MIDC site ' f'{site.name}, missing required parameters.') return # use site name without network here to build # a name with the original column label rather than # the SFA variable site_name = common.site_name_no_network(site) try: site_df = fetch(api, site, start, end) except error.HTTPError: logger.error('Could not find data to create observations ' f'for SRML site {site_name}.') return else: if site_df is None: logger.error('Could not find data to create observations ' f'for SRML site {site_name}.') return for variable in srml_variable_map.keys(): matches = [col for col in site_df.columns if variable in col] for match in matches: observation_extra_parameters = extra_params.copy() observation_extra_parameters.update( {'network_data_label': match}) try: # Here, we pass a name with match instead of variable # to differentiate between multiple observations of # the same variable common.create_observation( api, site, srml_variable_map[variable], name=f'{site_name} {match}', interval_label='beginning', extra_params=observation_extra_parameters) except HTTPError as e: logger.error( f'Failed to create {variable} observation at Site ' f'{site.name}. Error: {e.response.text}')