def test_combine_subregions_nodate(): test_data = pd.DataFrame(create_test_data()) test_data.loc[:, 'end_date'] = pd.to_datetime(test_data['end_date']) test_data_subregion = test_data[['end_date', 'value']] test_data_no_date = test_data_subregion.drop('end_date', axis=1) with pytest.raises(KeyError): get_transform_data.combine_subregions(test_data_no_date)
def time_series(client, data_series, initial_date, final_date): """ retrieves a sub-time series of a time series associated with a gro data_series :param client: GroClient :param data_series: A dictionary of Gro data_series :param final_date: 'YYYY-MM-DD' :param initial_date: 'YYYY-MM-DD' :return: A dataframe with data from the relevant dates """ logger = client.get_logger() entities = {'metric_id', 'item_id', 'region_id', 'source_id', 'frequency_id', 'start_date'} discard = [] for entity_type in data_series: if entity_type not in entities: discard.append(entity_type) for entity_type in discard: data_series.pop(entity_type) data = get_transform_data.get_data(client, **data_series) consolidated_data = get_transform_data.combine_subregions(data) try: ts = get_transform_data.extract_time_periods_by_dates(consolidated_data, initial_date, final_date) return ts except Exception as e: message = ('Please check availability of data for {}'.format(client.lookup( 'items', data_series['item_id'])['name'])) logger.warning(message) raise e
def test_combine_subregions_with_subregion(): test_data = pd.DataFrame(create_test_data()) test_data.loc[:, 'end_date'] = pd.to_datetime(test_data['end_date']) test_data_subregion = test_data[['end_date', 'value']] expected_subregion = pd.DataFrame({'end_date': ['2000-03-01T00:00:00.000Z', '2005-08-28T00:00:00.000Z', '2019-07-31T00:00:00.000Z'], 'value': [2.39664378851418, 2.28192643351167, 0.13002748115958]}) utc_tz = False if pd.to_datetime(expected_subregion['end_date'][0]).tzinfo: utc_tz = True # Depending on the version of pandas, the data type of the datetime object # can vary between 'datetime64[ns, UTC]' and 'datetime64[ns]' expected_subregion.loc[:, 'end_date'] = pd.to_datetime(expected_subregion['end_date'], utc=utc_tz) # The order of the columns after applying consolidation function is the following expected_subregion = expected_subregion[['value', 'end_date']] # The dataframes index columns is same as 'end_date' column expected_subregion.index = expected_subregion['end_date'] # Test the equality of frames # expected_subregion.index = pd.to_datetime(expected_subregion.index) expected_subregion.loc[:, 'end_date'] = expected_subregion.index assert_frame_equal(get_transform_data.combine_subregions(test_data_subregion), expected_subregion)
def test_combine_subregions_with_nosubregion(): test_data = pd.DataFrame(create_test_data()) test_data.loc[:, 'end_date'] = pd.to_datetime(test_data['end_date']) test_data_subregion = test_data[['end_date', 'value']] test_data_nosubregion = test_data_subregion.drop( test_data_subregion.index[-1]) expected_nosubregion = pd.DataFrame({ 'end_date': [ '2019-07-31T00:00:00.000Z', '2005-08-28T00:00:00.000Z', '2000-03-01T00:00:00.000Z' ], 'value': [0.13002748115958, 1.17640700229636, 2.39664378851418] }) utc_tz = False if pd.to_datetime(expected_nosubregion['end_date'][0]).tzinfo: utc_tz = True # Depending on the version of pandas, the data type of the datetime object # can vary between 'datetime64[ns, UTC]' and 'datetime64[ns]' expected_nosubregion.index = expected_nosubregion['end_date'] expected_nosubregion.loc[:, 'end_date'] = pd.to_datetime( expected_nosubregion['end_date'], utc=utc_tz) expected_nosubregion.index = pd.to_datetime(expected_nosubregion.index, utc=utc_tz) expected_nosubregion = expected_nosubregion.resample('D').pad() expected_nosubregion.loc[:, 'end_date'] = pd.to_datetime( expected_nosubregion.index, utc=utc_tz) assert_frame_equal( get_transform_data.combine_subregions(test_data_nosubregion), expected_nosubregion)