예제 #1
0
def test_combine_subregions_nodate():
    test_data = pd.DataFrame(create_test_data())
    test_data.loc[:, 'end_date'] = pd.to_datetime(test_data['end_date'])
    test_data_subregion = test_data[['end_date', 'value']]
    test_data_no_date = test_data_subregion.drop('end_date', axis=1)
    with pytest.raises(KeyError):
        get_transform_data.combine_subregions(test_data_no_date)
def time_series(client, data_series, initial_date, final_date):
    """
    retrieves a sub-time series of a time series associated with a gro data_series
    :param client: GroClient
    :param data_series: A dictionary of Gro data_series
    :param final_date: 'YYYY-MM-DD'
    :param initial_date: 'YYYY-MM-DD'
    :return: A dataframe with data from the relevant dates
    """
    logger = client.get_logger()
    entities = {'metric_id', 'item_id', 'region_id', 'source_id', 'frequency_id', 'start_date'}
    discard = []
    for entity_type in data_series:
        if entity_type not in entities:
            discard.append(entity_type)
    for entity_type in discard:
        data_series.pop(entity_type)
    data = get_transform_data.get_data(client, **data_series)
    consolidated_data = get_transform_data.combine_subregions(data)
    try:
        ts = get_transform_data.extract_time_periods_by_dates(consolidated_data,
                                                              initial_date,
                                                              final_date)
        return ts
    except Exception as e:
        message = ('Please check availability of data for {}'.format(client.lookup(
            'items', data_series['item_id'])['name']))
        logger.warning(message)
        raise e
예제 #3
0
def test_combine_subregions_with_subregion():
    test_data = pd.DataFrame(create_test_data())
    test_data.loc[:, 'end_date'] = pd.to_datetime(test_data['end_date'])
    test_data_subregion = test_data[['end_date', 'value']]
    expected_subregion = pd.DataFrame({'end_date': ['2000-03-01T00:00:00.000Z',
                                                    '2005-08-28T00:00:00.000Z',
                                                    '2019-07-31T00:00:00.000Z'],
                                       'value': [2.39664378851418,
                                                 2.28192643351167,
                                                 0.13002748115958]})
    utc_tz = False
    if pd.to_datetime(expected_subregion['end_date'][0]).tzinfo:
        utc_tz = True
    # Depending on the version of pandas, the data type of the datetime object
    # can vary between 'datetime64[ns, UTC]' and 'datetime64[ns]'
    expected_subregion.loc[:, 'end_date'] = pd.to_datetime(expected_subregion['end_date'],
                                                           utc=utc_tz)
    # The order of the columns after applying consolidation function is the following
    expected_subregion = expected_subregion[['value', 'end_date']]
    # The dataframes index columns is same as 'end_date' column
    expected_subregion.index = expected_subregion['end_date']
    # Test the equality of frames
    # expected_subregion.index = pd.to_datetime(expected_subregion.index)
    expected_subregion.loc[:, 'end_date'] = expected_subregion.index
    assert_frame_equal(get_transform_data.combine_subregions(test_data_subregion),
                       expected_subregion)
예제 #4
0
def test_combine_subregions_with_nosubregion():
    test_data = pd.DataFrame(create_test_data())
    test_data.loc[:, 'end_date'] = pd.to_datetime(test_data['end_date'])
    test_data_subregion = test_data[['end_date', 'value']]
    test_data_nosubregion = test_data_subregion.drop(
        test_data_subregion.index[-1])
    expected_nosubregion = pd.DataFrame({
        'end_date': [
            '2019-07-31T00:00:00.000Z', '2005-08-28T00:00:00.000Z',
            '2000-03-01T00:00:00.000Z'
        ],
        'value': [0.13002748115958, 1.17640700229636, 2.39664378851418]
    })
    utc_tz = False
    if pd.to_datetime(expected_nosubregion['end_date'][0]).tzinfo:
        utc_tz = True
    # Depending on the version of pandas, the data type of the datetime object
    # can vary between 'datetime64[ns, UTC]' and 'datetime64[ns]'
    expected_nosubregion.index = expected_nosubregion['end_date']
    expected_nosubregion.loc[:, 'end_date'] = pd.to_datetime(
        expected_nosubregion['end_date'], utc=utc_tz)
    expected_nosubregion.index = pd.to_datetime(expected_nosubregion.index,
                                                utc=utc_tz)
    expected_nosubregion = expected_nosubregion.resample('D').pad()
    expected_nosubregion.loc[:, 'end_date'] = pd.to_datetime(
        expected_nosubregion.index, utc=utc_tz)
    assert_frame_equal(
        get_transform_data.combine_subregions(test_data_nosubregion),
        expected_nosubregion)