def test_extract_time_periods_by_dates_invalid_test_data(): test_data = pd.DataFrame(create_test_data()) # this is invalid test data as there is no 'end_date' column invalid_test_data = test_data.drop('end_date', axis=1) initial_date = '2016-02-02' final_date = '2017-02-01' with pytest.raises(KeyError): get_transform_data.extract_time_periods_by_dates( invalid_test_data, initial_date, final_date)
def test_extract_time_periods_by_dates_invalid_test_dates(): test_data = pd.DataFrame(create_test_data()) test_data.loc[:, 'end_date'] = pd.to_datetime(test_data['end_date']) # this is an invalid date because the initial and final dates are not within 1 year invalid_initial_date = '2016-02-02' invalid_final_date = '2017-02-02' with pytest.raises(ValueError): get_transform_data.extract_time_periods_by_dates( test_data, invalid_initial_date, invalid_final_date)
def time_series(client, data_series, initial_date, final_date): """ retrieves a sub-time series of a time series associated with a gro data_series :param client: GroClient :param data_series: A dictionary of Gro data_series :param final_date: 'YYYY-MM-DD' :param initial_date: 'YYYY-MM-DD' :return: A dataframe with data from the relevant dates """ logger = client.get_logger() entities = {'metric_id', 'item_id', 'region_id', 'source_id', 'frequency_id', 'start_date'} discard = [] for entity_type in data_series: if entity_type not in entities: discard.append(entity_type) for entity_type in discard: data_series.pop(entity_type) data = get_transform_data.get_data(client, **data_series) consolidated_data = get_transform_data.combine_subregions(data) try: ts = get_transform_data.extract_time_periods_by_dates(consolidated_data, initial_date, final_date) return ts except Exception as e: message = ('Please check availability of data for {}'.format(client.lookup( 'items', data_series['item_id'])['name'])) logger.warning(message) raise e
def test_extract_time_periods_by_dates_with_non_unique_dates(): test_data = pd.DataFrame(create_test_data()) initial_date = '2019-03-01' final_date = '2019-07-31' expected_data = test_data expected_data.loc[:, 'date'] = pd.to_datetime(expected_data['end_date']) expected_data.loc[:, 'year'] = expected_data['date'].dt.year.astype('str') expected_data.loc[:, 'period'] = ['2019-03-01 to 2019-07-31', '2005-03-01 to 2005-07-31', '2000-03-01 to 2000-07-31', '2005-03-01 to 2005-07-31'] expected_data = expected_data[expected_data.period != '2005-03-01 to 2005-07-31'] expected_data.loc[:, 'mm-dd'] = expected_data['date'].dt.strftime("%m-%d") expected_data = expected_data.sort_values(by=list(expected_data.columns), axis=0, ascending=False) assert_frame_equal( get_transform_data.extract_time_periods_by_dates(test_data, initial_date, final_date), expected_data)