def test_utils(self): year_range = range(2018, 2050) (in_range, ex_range) = utils.split_range(year_range, self.snpp.max_year(utils.EN)) self.assertEqual(min(in_range), min(year_range)) self.assertEqual(max(in_range), 2027) self.assertEqual(min(ex_range), 2028) self.assertEqual(max(ex_range), max(year_range))
def test_utils(self): year_range = range(2018, 2050) (in_range, ex_range) = utils.split_range(year_range, self.snpp.max_year(utils.EN)) self.assertEqual(min(in_range), min(year_range)) self.assertEqual(max(in_range), 2029) self.assertEqual(min(ex_range), 2030) self.assertEqual(max(ex_range), max(year_range)) self.assertEqual(utils.trim_range(2011, 1991, 2016), [2011]) self.assertEqual(utils.trim_range(2011.0, 1991, 2016), [2011]) self.assertEqual(utils.trim_range([2011], 1991, 2016), [2011]) self.assertEqual(utils.trim_range([2011.0], 1991, 2016), [2011]) self.assertEqual(utils.trim_range(np.array([1995, 2005, 2019]), 2001, 2011), [2005]) self.assertEqual(utils.trim_range([1969, 2111], 1991, 2016), []) self.assertEqual(utils.trim_range(range(1969, 2111), 2011, 2016), list(range(2011, 2017))) codes = "E09000001" self.assertTrue(utils.country(codes) == ["en"]) codes = ['E06000002', 'E09000001'] self.assertTrue(utils.country(codes) == ["en"]) codes = ['E06000002', 'N09000001', 'S12000033', 'W06000011'] self.assertTrue(utils.country(codes) == ['en', 'ni', 'sc', 'wa']) codes = ['E06000001', 'E06000002', 'N09000001', 'S12000033', 'W06000011'] self.assertTrue(utils.country(codes) == ['en', 'ni', 'sc', 'wa']) codes = ['E06000001', 'W06000011', 'X06000002', 'Y09000001', 'Z12000033'] self.assertTrue(utils.country(codes) == ["en", "wa"]) codes = 'A06000001' self.assertTrue(utils.country(codes) == []) codes = ['E06000001', 'E06000002', 'N09000001', 'S12000033', 'W06000011'] split = utils.split_by_country(codes) self.assertTrue(split[utils.EN] == ['E06000001', 'E06000002']) self.assertTrue(split[utils.WA] == ['W06000011']) self.assertTrue(split[utils.SC] == ['S12000033']) self.assertTrue(split[utils.NI] == ['N09000001']) # naively, each element would be rounded down, making the total 10 fractional = np.array([0.1, 0.2, 0.3, 0.4]) * 11 integral = utils.integerise(fractional) self.assertTrue(np.array_equal(integral, [1, 2, 3, 5])) # 1.51 is NOT increased because 4.5 has a larger fractional part when total is rescaled to 17 from 16.91 fractional = np.array([1.1, 3.9, 4.5, 5.9, 1.51]) integral = utils.integerise(fractional) self.assertTrue(np.array_equal(integral, [1, 4, 5, 6, 1])) # another example that preserves sum fractional = np.array([1.01] * 100) integral = utils.integerise(fractional) self.assertTrue(sum(integral) == 1.01 * 100) self.assertTrue(np.array_equal(np.unique(integral), [1, 2]))
def extrapolate(self, npp, geog_code, year_range): (in_range, ex_range) = utils.split_range(year_range, self.max_year(geog_code)) all_years = self.filter(geog_code, in_range) for year in ex_range: data = self.filter([geog_code], [self.max_year(geog_code)]) scaling = npp.year_ratio("ppp", utils.country(geog_code), self.max_year(geog_code), year) assert(len(data == len(scaling))) data.OBS_VALUE = data.OBS_VALUE * scaling.OBS_VALUE data.PROJECTED_YEAR_NAME = year all_years = all_years.append(data, ignore_index=True) return all_years
def create_variant(self, variant_name, npp, geog_codes, year_range): """ Apply NPP variant to SNPP: SNPP(v) = SNPP(0) * sum(a,g) [ NPP(v) / NPP(0) ] Preserves age-gender structure of SNPP data """ result = pd.DataFrame() if isinstance(geog_codes, str): geog_codes = [geog_codes] for geog_code in geog_codes: # split out any years prior to the NPP data (currently SNPP is 2014 based but NPP is 2016) (pre_range, in_range) = utils.split_range(year_range, npp.min_year() - 1) # for any years prior to NPP we just use the SNPP data as-is (i.e. "ppp") pre_data = self.filter(geog_code, pre_range) if pre_range else pd.DataFrame() if len(pre_data) > 0: print( "WARNING: variant {} not applied for years {} that predate the NPP data" .format(variant_name, pre_range)) # return if there's nothing in the NPP range if not in_range: result.append(pre_data) continue data = self.extrapolate(npp, geog_code, in_range).sort_values( ["C_AGE", "GENDER", "PROJECTED_YEAR_NAME"]).reset_index(drop=True) scaling = npp.variant_ratio(variant_name, utils.country(geog_code), year_range).reset_index().sort_values([ "C_AGE", "GENDER", "PROJECTED_YEAR_NAME" ]) # scaling.to_csv(variant_name + ".csv", index=False) # print("DF: ", len(data), ":", len(scaling)) assert (len(data) == len(scaling)) data.OBS_VALUE = data.OBS_VALUE * scaling.OBS_VALUE # prepend any pre-NPP data result = result.append(pre_data.append(data)) return result
def extrapolate(self, npp, geog_codes, year_range): if isinstance(geog_codes, str): geog_codes = [geog_codes] geog_codes = utils.split_by_country(geog_codes) all_codes_all_years = pd.DataFrame() for country in geog_codes: if not geog_codes[country]: continue max_year = self.max_year(country) last_year = self.filter(geog_codes[country], max_year) (in_range, ex_range) = utils.split_range(year_range, max_year) # years that dont need to be extrapolated all_years = self.filter(geog_codes[country], in_range) if in_range else pd.DataFrame() for year in ex_range: data = last_year.copy() scaling = npp.year_ratio("ppp", country, max_year, year) data = data.merge(scaling[["GENDER", "C_AGE", "OBS_VALUE"]], on=["GENDER", "C_AGE"]) data["OBS_VALUE"] = data.OBS_VALUE_x * data.OBS_VALUE_y data.PROJECTED_YEAR_NAME = year all_years = all_years.append(data.drop( ["OBS_VALUE_x", "OBS_VALUE_y"], axis=1), ignore_index=True, sort=False) all_codes_all_years = all_codes_all_years.append(all_years, ignore_index=True, sort=False) return all_codes_all_years
import ukpopulation.myedata as MYEData import ukpopulation.nppdata as NPPData import ukpopulation.snppdata as SNPPData import ukpopulation.utils as utils # initialise the population modules mye = MYEData.MYEData() npp = NPPData.NPPData() snpp = SNPPData.SNPPData() lad = "E07000041" # Exeter # 50 year horizon years = range(2011, 2067) (mye_years, proj_years) = utils.split_range(years, mye.max_year()) (snpp_years, npp_years) = utils.split_range(proj_years, snpp.max_year(lad)) pop_mye = mye.aggregate(["GENDER", "C_AGE"], lad, mye_years) # get the total projected population for pop up to the SNPP horizon (2039) pop = snpp.aggregate(["GENDER", "C_AGE"], lad, snpp_years) # extrapolate for another ~25 years pop_ex = snpp.extrapolagg(["GENDER", "C_AGE"], npp, lad, npp_years) # plot the data plt.plot(pop_mye.PROJECTED_YEAR_NAME, pop_mye.OBS_VALUE, "b^", label="MYE") plt.plot(pop.PROJECTED_YEAR_NAME, pop.OBS_VALUE, "bo", label="SNPP") plt.plot(pop_ex.PROJECTED_YEAR_NAME, pop_ex.OBS_VALUE, "b.", label="ext NPP") plt.xlabel("Year") plt.ylabel("Persons") plt.legend()