Exemple #1
0
def test_extract_time_periods_by_dates_invalid_test_data():
    test_data = pd.DataFrame(create_test_data())
    # this is invalid test data as there is no 'end_date' column
    invalid_test_data = test_data.drop('end_date', axis=1)
    initial_date = '2016-02-02'
    final_date = '2017-02-01'
    with pytest.raises(KeyError):
        get_transform_data.extract_time_periods_by_dates(
            invalid_test_data, initial_date, final_date)
Exemple #2
0
def test_extract_time_periods_by_dates_invalid_test_dates():
    test_data = pd.DataFrame(create_test_data())
    test_data.loc[:, 'end_date'] = pd.to_datetime(test_data['end_date'])
    # this is an invalid date because the initial and final dates are not within 1 year
    invalid_initial_date = '2016-02-02'
    invalid_final_date = '2017-02-02'
    with pytest.raises(ValueError):
        get_transform_data.extract_time_periods_by_dates(
            test_data, invalid_initial_date, invalid_final_date)
def time_series(client, data_series, initial_date, final_date):
    """
    retrieves a sub-time series of a time series associated with a gro data_series
    :param client: GroClient
    :param data_series: A dictionary of Gro data_series
    :param final_date: 'YYYY-MM-DD'
    :param initial_date: 'YYYY-MM-DD'
    :return: A dataframe with data from the relevant dates
    """
    logger = client.get_logger()
    entities = {'metric_id', 'item_id', 'region_id', 'source_id', 'frequency_id', 'start_date'}
    discard = []
    for entity_type in data_series:
        if entity_type not in entities:
            discard.append(entity_type)
    for entity_type in discard:
        data_series.pop(entity_type)
    data = get_transform_data.get_data(client, **data_series)
    consolidated_data = get_transform_data.combine_subregions(data)
    try:
        ts = get_transform_data.extract_time_periods_by_dates(consolidated_data,
                                                              initial_date,
                                                              final_date)
        return ts
    except Exception as e:
        message = ('Please check availability of data for {}'.format(client.lookup(
            'items', data_series['item_id'])['name']))
        logger.warning(message)
        raise e
Exemple #4
0
def test_extract_time_periods_by_dates_with_non_unique_dates():
    test_data = pd.DataFrame(create_test_data())
    initial_date = '2019-03-01'
    final_date = '2019-07-31'
    expected_data = test_data
    expected_data.loc[:, 'date'] = pd.to_datetime(expected_data['end_date'])
    expected_data.loc[:, 'year'] = expected_data['date'].dt.year.astype('str')
    expected_data.loc[:, 'period'] = ['2019-03-01 to 2019-07-31',
                                      '2005-03-01 to 2005-07-31',
                                      '2000-03-01 to 2000-07-31',
                                      '2005-03-01 to 2005-07-31']
    expected_data = expected_data[expected_data.period != '2005-03-01 to 2005-07-31']
    expected_data.loc[:, 'mm-dd'] = expected_data['date'].dt.strftime("%m-%d")
    expected_data = expected_data.sort_values(by=list(expected_data.columns), axis=0,
                                              ascending=False)
    assert_frame_equal(
        get_transform_data.extract_time_periods_by_dates(test_data,
                                                         initial_date,
                                                         final_date),
        expected_data)