def test_fetch_arm(user_id, api_key, stream, variables, start, end, mocker): mocker.patch('solarforecastarbiter.io.fetch.arm.list_arm_filenames', side_effect=filenames) mocker.patch('solarforecastarbiter.io.fetch.arm.retrieve_arm_dataset', side_effect=request_file) data = arm.fetch_arm(user_id, api_key, stream, variables, start, end) assert variables[0] in data.columns
def test_no_files(user_id, api_key, mocker): mocker.patch('solarforecastarbiter.io.fetch.arm.list_arm_filenames', side_effect=filenames) mocker.patch('solarforecastarbiter.io.fetch.arm.retrieve_arm_dataset', side_effect=request_file) start = end = pd.Timestamp.now() + pd.Timedelta('1 days') arm_df = arm.fetch_arm(user_id, api_key, 'ds_no_files', ['down_short_hemisp'], start, end) assert arm_df.empty
def test_fetch_arm_request_file_failure(mocker): mocker.patch('solarforecastarbiter.io.fetch.arm.list_arm_filenames', return_value=['afilename']) mocker.patch('solarforecastarbiter.io.fetch.arm.retrieve_arm_dataset', side_effect=ChunkedEncodingError) mocked_log = mocker.patch('solarforecastarbiter.io.fetch.arm.logger') data = arm.fetch_arm('user', 'key', 'stream', ['ghi'], 'start', 'end') mocked_log.error.assert_called_with( f'Request failed for DOE ARM file afilename') assert data.empty
def fetch(api, site, start, end, *, doe_arm_user_id, doe_arm_api_key): """Retrieve observation data for a DOE ARM site between start and end. Parameters ---------- api : io.APISession Unused but conforms to common.update_site_observations call site : datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. doe_arm_user_id : str User ID to access the DOE ARM api. doe_arm_api_key : str API key to access the DOE ARM api. Returns ------- data : pandas.DataFrame All of the requested data concatenated into a single DataFrame. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: return pd.DataFrame() doe_arm_datastream = site_extra_params['network_api_id'] site_vars = _determine_site_vars(doe_arm_datastream) obs_df = arm.fetch_arm(doe_arm_user_id, doe_arm_api_key, doe_arm_datastream, site_vars, start.tz_convert(site.timezone), end.tz_convert(site.timezone)) if obs_df.empty: logger.warning(f'Data for site {site.name} contained no ' f'entries from {start} to {end}.') return pd.DataFrame() obs_df = obs_df.rename(columns=DOE_ARM_VARIABLE_MAP) return obs_df
def fetch(api, site, start, end, *, doe_arm_user_id, doe_arm_api_key): """Retrieve observation data for a DOE ARM site between start and end. Parameters ---------- api : io.APISession Unused but conforms to common.update_site_observations call site : datamodel.Site Site object with the appropriate metadata. start : datetime The beginning of the period to request data for. end : datetime The end of the period to request data for. doe_arm_user_id : str User ID to access the DOE ARM api. doe_arm_api_key : str API key to access the DOE ARM api. Returns ------- data : pandas.DataFrame All of the requested data concatenated into a single DataFrame. """ try: site_extra_params = common.decode_extra_parameters(site) except ValueError: return pd.DataFrame() available_datastreams = site_extra_params['datastreams'] datastreams = {} # Build a dict with top-level keys to 'met' and 'qcrad' if meteorological # or irradiance data exists at the site. This is to later group dataframes # created from each datastream by the type of data found in the stream. for ds_type in ['met', 'qcrad']: if ds_type in available_datastreams: ds_type_dict = {} streams = available_datastreams[ds_type] # When a dict is present each key is a datastream and value is # a date range for which the datastream contains data. We need to # determine which streams to use to get all of the requested data. if isinstance(streams, dict): ds_type_dict.update( find_stream_data_availability(streams, start, end)) else: # If a single string datastream name exists, we assume that all # available data is contained in the stream. Deferring to the # data fetch process, which will fail to retrieve data and # continue gracefully. ds_type_dict[streams] = (start, end) datastreams[ds_type] = ds_type_dict site_dfs = [] for stream_type in datastreams: # Stitch together all the datastreams with similar data. stream_type_dfs = [] for datastream, date_range in datastreams[stream_type].items(): stream_df = arm.fetch_arm(doe_arm_user_id, doe_arm_api_key, datastream, _determine_stream_vars(datastream), date_range[0].tz_convert(site.timezone), date_range[1].tz_convert(site.timezone)) if stream_df.empty: logger.warning(f'Datastream {datastream} for site {site.name} ' f'contained no entries from {start} to {end}.') else: stream_type_dfs.append(stream_df) if stream_type_dfs: # Concatenate all dataframes of similar data stream_type_df = pd.concat(stream_type_dfs) site_dfs.append(stream_type_df) if site_dfs: # Join dataframes with different variables along the index, this has # the side effect of introducing missing data if any requests have # failed. obs_df = pd.concat(site_dfs, axis=1) obs_df = obs_df.rename(columns=DOE_ARM_VARIABLE_MAP) return obs_df else: logger.warning(f'Data for site {site.name} contained no entries from ' f'{start} to {end}.') return pd.DataFrame()