def test_fetch_arm(user_id, api_key, stream, variables, start, end, mocker):
    mocker.patch('solarforecastarbiter.io.fetch.arm.list_arm_filenames',
                 side_effect=filenames)
    mocker.patch('solarforecastarbiter.io.fetch.arm.retrieve_arm_dataset',
                 side_effect=request_file)
    data = arm.fetch_arm(user_id, api_key, stream, variables, start, end)
    assert variables[0] in data.columns
def test_no_files(user_id, api_key, mocker):
    mocker.patch('solarforecastarbiter.io.fetch.arm.list_arm_filenames',
                 side_effect=filenames)
    mocker.patch('solarforecastarbiter.io.fetch.arm.retrieve_arm_dataset',
                 side_effect=request_file)
    start = end = pd.Timestamp.now() + pd.Timedelta('1 days')
    arm_df = arm.fetch_arm(user_id, api_key, 'ds_no_files',
                           ['down_short_hemisp'], start, end)
    assert arm_df.empty
def test_fetch_arm_request_file_failure(mocker):
    mocker.patch('solarforecastarbiter.io.fetch.arm.list_arm_filenames',
                 return_value=['afilename'])
    mocker.patch('solarforecastarbiter.io.fetch.arm.retrieve_arm_dataset',
                 side_effect=ChunkedEncodingError)
    mocked_log = mocker.patch('solarforecastarbiter.io.fetch.arm.logger')
    data = arm.fetch_arm('user', 'key', 'stream', ['ghi'], 'start', 'end')
    mocked_log.error.assert_called_with(
        f'Request failed for DOE ARM file afilename')
    assert data.empty
Beispiel #4
0
def fetch(api, site, start, end, *, doe_arm_user_id, doe_arm_api_key):
    """Retrieve observation data for a DOE ARM site between start and end.

    Parameters
    ----------
    api : io.APISession
        Unused but conforms to common.update_site_observations call
    site : datamodel.Site
        Site object with the appropriate metadata.
    start : datetime
        The beginning of the period to request data for.
    end : datetime
        The end of the period to request data for.
    doe_arm_user_id : str
        User ID to access the DOE ARM api.
    doe_arm_api_key : str
        API key to access the DOE ARM api.

    Returns
    -------
    data : pandas.DataFrame
        All of the requested data concatenated into a single DataFrame.
    """
    try:
        site_extra_params = common.decode_extra_parameters(site)
    except ValueError:
        return pd.DataFrame()
    doe_arm_datastream = site_extra_params['network_api_id']
    site_vars = _determine_site_vars(doe_arm_datastream)
    obs_df = arm.fetch_arm(doe_arm_user_id, doe_arm_api_key,
                           doe_arm_datastream, site_vars,
                           start.tz_convert(site.timezone),
                           end.tz_convert(site.timezone))
    if obs_df.empty:
        logger.warning(f'Data for site {site.name} contained no '
                       f'entries from {start} to {end}.')
        return pd.DataFrame()
    obs_df = obs_df.rename(columns=DOE_ARM_VARIABLE_MAP)
    return obs_df
Beispiel #5
0
def fetch(api, site, start, end, *, doe_arm_user_id, doe_arm_api_key):
    """Retrieve observation data for a DOE ARM site between start and end.

    Parameters
    ----------
    api : io.APISession
        Unused but conforms to common.update_site_observations call
    site : datamodel.Site
        Site object with the appropriate metadata.
    start : datetime
        The beginning of the period to request data for.
    end : datetime
        The end of the period to request data for.
    doe_arm_user_id : str
        User ID to access the DOE ARM api.
    doe_arm_api_key : str
        API key to access the DOE ARM api.

    Returns
    -------
    data : pandas.DataFrame
        All of the requested data concatenated into a single DataFrame.
    """
    try:
        site_extra_params = common.decode_extra_parameters(site)
    except ValueError:
        return pd.DataFrame()

    available_datastreams = site_extra_params['datastreams']

    datastreams = {}
    # Build a dict with top-level keys to 'met' and 'qcrad' if meteorological
    # or irradiance  data exists at the site. This is to later group dataframes
    # created from each datastream by the type of data found in the stream.
    for ds_type in ['met', 'qcrad']:
        if ds_type in available_datastreams:
            ds_type_dict = {}
            streams = available_datastreams[ds_type]

            # When a dict is present each key is a datastream and value is
            # a date range for which the datastream contains data. We need to
            # determine which streams to use to get all of the requested data.
            if isinstance(streams, dict):
                ds_type_dict.update(
                    find_stream_data_availability(streams, start, end))
            else:
                # If a single string datastream name exists, we assume that all
                # available data is contained in the stream. Deferring to the
                # data fetch process, which will fail to retrieve data and
                # continue gracefully.
                ds_type_dict[streams] = (start, end)
            datastreams[ds_type] = ds_type_dict

    site_dfs = []

    for stream_type in datastreams:
        # Stitch together all the datastreams with similar data.
        stream_type_dfs = []
        for datastream, date_range in datastreams[stream_type].items():
            stream_df = arm.fetch_arm(doe_arm_user_id, doe_arm_api_key,
                                      datastream,
                                      _determine_stream_vars(datastream),
                                      date_range[0].tz_convert(site.timezone),
                                      date_range[1].tz_convert(site.timezone))
            if stream_df.empty:
                logger.warning(f'Datastream {datastream} for site {site.name} '
                               f'contained no entries from {start} to {end}.')
            else:
                stream_type_dfs.append(stream_df)
        if stream_type_dfs:
            # Concatenate all dataframes of similar data
            stream_type_df = pd.concat(stream_type_dfs)
            site_dfs.append(stream_type_df)

    if site_dfs:
        # Join dataframes with different variables along the index, this has
        # the side effect of introducing missing data if any requests have
        # failed.
        obs_df = pd.concat(site_dfs, axis=1)
        obs_df = obs_df.rename(columns=DOE_ARM_VARIABLE_MAP)
        return obs_df
    else:
        logger.warning(f'Data for site {site.name} contained no entries from '
                       f'{start} to {end}.')
        return pd.DataFrame()