def test_integerise(self): # probs not valid r = hl.prob2IntFreq(np.array([0.3, 0.3, 0.2, 0.1]), 10) self.assertTrue(r == "probabilities do not sum to unity") # pop not valid r = hl.prob2IntFreq(np.array([0.4, 0.3, 0.2, 0.1]), -1) self.assertTrue(r == "population cannot be negative") # zero pop r = hl.prob2IntFreq(np.array([0.4, 0.3, 0.2, 0.1]), 0) self.assertTrue(r["rmse"] == 0.0) self.assertTrue(np.array_equal(r["freq"], np.array([0, 0, 0, 0]))) # exact r = hl.prob2IntFreq(np.array([0.4, 0.3, 0.2, 0.1]), 10) self.assertTrue(r["rmse"] == 0.0) self.assertTrue(np.array_equal(r["freq"], np.array([4, 3, 2, 1]))) # inexact r = hl.prob2IntFreq(np.array([0.4, 0.3, 0.2, 0.1]), 17) self.assertAlmostEqual(r["rmse"], np.sqrt(0.075)) self.assertTrue(np.array_equal(r["freq"], np.array([7, 5, 3, 2]))) # 1-d case r = hl.integerise(np.array([2.0, 1.5, 1.0, 0.5])) self.assertTrue(r["conv"]) # multidim integerisation # invalid population s = np.array([[1.1, 1.0], [1.0, 1.0]]) r = hl.integerise(s) self.assertEqual( r, "Marginal or total value 4.100000 is not an integer (within tolerance 0.000100)" ) # invalid marginals s = np.array([[1.1, 1.0], [0.9, 1.0]]) r = hl.integerise(s) self.assertEqual( r, "Marginal or total value 2.100000 is not an integer (within tolerance 0.000100)" ) # use IPF to generate a valid fractional population m0 = np.array([111, 112, 113, 114, 110], dtype=float) m1 = np.array([136, 142, 143, 139], dtype=float) s = np.ones([len(m0), len(m1), len(m0)]) fpop = hl.ipf(s, [np.array( [0]), np.array([1]), np.array([2])], [m0, m1, m0])["result"] result = hl.integerise(fpop) self.assertTrue(result["conv"]) self.assertEqual(np.sum(result["result"]), sum(m0)) self.assertTrue(result["rmse"] < 1.05717)
def __microsynthesise(self, year): #LAD=self.region # Census/seed proportions for geography and ethnicity oa_prop = self.seed.sum((1, 2, 3)) / self.seed.sum() eth_prop = self.seed.sum((0, 1, 2)) / self.seed.sum() if year < self.snpp_api.min_year(self.region): age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.mye_api.filter(self.region, year)), self.region) elif year <= self.npp_api.max_year(): # Don't attempt to apply NPP variant if before the start of the NPP data if year < self.npp_api.min_year(): age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.snpp_api.filter(self.region, year)), self.region) else: age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.snpp_api.create_variant(self.variant, self.npp_api, self.region, year)), self.region) else: raise ValueError("Cannot microsimulate past NPP horizon year ({})", self.npp_api.max_year()) # convert proportions/probabilities to integer frequencies oa = hl.prob2IntFreq(oa_prop, age_sex.sum())["freq"] eth = hl.prob2IntFreq(eth_prop, age_sex.sum())["freq"] # combine the above into a 2d marginal using QIS-I and census 2011 or later data as the seed oa_eth = hl.qisi(self.seed.sum((1, 2)), [np.array([0]), np.array([1])], [oa, eth]) if not (isinstance(oa_eth, dict) and oa_eth["conv"]): raise RuntimeError("oa_eth did not converge") # now the full seeded microsynthesis if self.fast_mode: msynth = hl.ipf(self.seed, [np.array([0, 3]), np.array([1, 2])], [oa_eth["result"].astype(float), age_sex.astype(float)]) else: msynth = hl.qisi(self.seed, [np.array([0, 3]), np.array([1, 2])], [oa_eth["result"], age_sex]) if not msynth["conv"]: print(msynth) raise RuntimeError("msynth did not converge") #print(msynth["pop"]) if self.fast_mode: print("updating seed to", year, " ", end="") self.seed = msynth["result"] msynth["result"] = np.around(msynth["result"]).astype(int) else: print("updating seed to", year, " ", end="") self.seed = msynth["result"].astype(float) rawtable = hl.flatten(msynth["result"]) #, c("OA", "SEX", "AGE", "ETH")) # col names and remapped values table = pd.DataFrame(columns=["Area", "DC1117EW_C_SEX", "DC1117EW_C_AGE", "DC2101EW_C_ETHPUK11"]) table.Area = utils.remap(rawtable[0], self.geog_map) table.DC1117EW_C_SEX = utils.remap(rawtable[1], [1, 2]) table.DC1117EW_C_AGE = utils.remap(rawtable[2], range(1, 87)) table.DC2101EW_C_ETHPUK11 = utils.remap(rawtable[3], self.eth_map) # consistency checks (in fast mode just report discrepancies) self.__check(table, age_sex, oa_eth["result"]) return table
def test_IPF(self): m0 = np.array([52.0, 48.0]) m1 = np.array([87.0, 13.0]) m2 = np.array([55.0, 45.0]) i = [np.array([0]),np.array([1])] s = np.ones([len(m0), len(m1)]) p = hl.ipf(s, i, [m0, m1]) #print(p) self.assertTrue(p["conv"]) self.assertEqual(p["pop"], 100.0) self.assertTrue(np.array_equal(p["result"], np.array([[45.24, 6.76], [41.76, 6.24]]))) s[0, 0] = 0.7 p = hl.ipf(s, i, [m0, m1]) #print(np.sum(p["result"], 0)) self.assertTrue(p["conv"]) # check overall population and marginals correct self.assertEqual(np.sum(p["result"]), p["pop"]) self.assertTrue(np.allclose(np.sum(p["result"], 0), m1)) self.assertTrue(np.allclose(np.sum(p["result"], 1), m0)) i = [np.array([0]),np.array([1]),np.array([2])] s = np.array([[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]]) p = hl.ipf(s, i, [m0, m1, m2]) print(np.sum(p["result"], (0, 1))) print(np.sum(p["result"], (1, 2))) print(np.sum(p["result"], (2, 0))) self.assertTrue(p["conv"]) # check overall population and marginals correct self.assertAlmostEqual(np.sum(p["result"]), p["pop"]) # default is 7d.p. self.assertTrue(np.allclose(np.sum(p["result"], (0, 1)), m2)) self.assertTrue(np.allclose(np.sum(p["result"], (1, 2)), m0)) self.assertTrue(np.allclose(np.sum(p["result"], (2, 0)), m1)) # 12D s = np.ones([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) i = [np.array([0]),np.array([1]),np.array([2]),np.array([3]),np.array([4]),np.array([5]), np.array([6]),np.array([7]),np.array([8]),np.array([9]),np.array([10]),np.array([11])] m = np.array([2048., 2048.]) p = hl.ipf(s,i,[m, m, m, m, m, m, m, m, m, m, m, m]) #print(p) self.assertTrue(p["pop"] == 4096) m0 = np.array([52.0, 48.0]) m1 = np.array([87.0, 13.0]) m2 = np.array([55.0, 45.0]) seed = np.ones([len(m0), len(m1)]) p = hl.ipf(seed, [np.array([0]),np.array([1])], [m0, m1]) self.assertTrue(np.allclose(np.sum(p["result"], (0)), m1)) self.assertTrue(np.allclose(np.sum(p["result"], (1)), m0)) self.assertTrue(p["conv"]) self.assertEqual(p["iterations"], 1) self.assertEqual(p["maxError"], 0.0) self.assertEqual(p["pop"], 100.0) self.assertTrue(np.array_equal(p["result"], np.array([[45.24, 6.76], [41.76, 6.24]]))) seed[0, 1] = 0.7 p = hl.ipf(seed, [np.array([0]),np.array([1])], [m0, m1]) self.assertTrue(np.allclose(np.sum(p["result"], (0)), m1)) self.assertTrue(np.allclose(np.sum(p["result"], (1)), m0)) self.assertTrue(p["conv"]) self.assertLess(p["iterations"], 6) self.assertLess(p["maxError"], 5e-10) self.assertEqual(p["pop"], 100.0) s = np.array([[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]]) p = hl.ipf(s, [np.array([0]),np.array([1]),np.array([2])], [m0, m1, m2]) print(np.sum(p["result"], (0, 1))) print(np.sum(p["result"], (1, 2))) print(np.sum(p["result"], (2, 0))) self.assertTrue(p["conv"]) # check overall population and marginals correct self.assertAlmostEqual(np.sum(p["result"]), p["pop"]) # default is 7d.p. self.assertTrue(np.allclose(np.sum(p["result"], (0, 1)), m2)) self.assertTrue(np.allclose(np.sum(p["result"], (1, 2)), m0)) self.assertTrue(np.allclose(np.sum(p["result"], (2, 0)), m1)) # 12D s = np.ones([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) m = np.array([2048., 2048.]) p = hl.ipf(s, [np.array([0]),np.array([1]),np.array([2]),np.array([3]),np.array([4]),np.array([5]),np.array([6]),np.array([7]),np.array([8]),np.array([9]),np.array([10]),np.array([11])],[m, m, m, m, m, m, m, m, m, m, m, m]) self.assertTrue(p["conv"] == True) self.assertTrue(p["pop"] == 4096)
def test_QISI(self): m0 = np.array([52, 48]) m1 = np.array([10, 77, 13]) i0 = np.array([0]) i1 = np.array([1]) s = np.ones([len(m0), len(m1)]) p = hl.qisi(s, [i0, i1], [m0, m1]) #print(p) self.assertTrue(p["conv"]) self.assertLess(p["chiSq"], 0.04) self.assertGreater(p["pValue"], 0.9) #self.assertLess(p["degeneracy"], 0.04) TODO check the calculation self.assertEqual(p["pop"], 100.0) self.assertTrue(np.allclose(np.sum(p["result"], 0), m1)) self.assertTrue(np.allclose(np.sum(p["result"], 1), m0)) #self.assertTrue(np.array_equal(p["result"], np.array([[5, 40, 7],[5, 37, 6]]))) m0 = np.array([52, 40, 4, 4]) m1 = np.array([87, 10, 3]) m2 = np.array([55, 15, 6, 12, 12]) i0 = np.array([0]) i1 = np.array([1]) i2 = np.array([2]) s = np.ones([len(m0), len(m1), len(m2)]) p = hl.qisi(s, [i0, i1, i2], [m0, m1, m2]) self.assertTrue(p["conv"]) self.assertLess(p["chiSq"], 70) # seems a bit high self.assertGreater(p["pValue"], 0.0) # seems a bit low self.assertEqual(p["pop"], 100.0) self.assertTrue(np.allclose(np.sum(p["result"], (0, 1)), m2)) self.assertTrue(np.allclose(np.sum(p["result"], (1, 2)), m0)) self.assertTrue(np.allclose(np.sum(p["result"], (2, 0)), m1)) m0 = np.array([52, 48]) m1 = np.array([87, 13]) m2 = np.array([67, 33]) m3 = np.array([55, 45]) i0 = np.array([0]) i1 = np.array([1]) i2 = np.array([2]) i3 = np.array([3]) s = np.ones([len(m0), len(m1), len(m2), len(m3)]) p = hl.qisi(s, [i0, i1, i2, i3], [m0, m1, m2, m3]) self.assertTrue(p["conv"]) self.assertLess(p["chiSq"], 5.5) self.assertGreater(p["pValue"], 0.02) self.assertEqual(p["pop"], 100) self.assertTrue(np.allclose(np.sum(p["result"], (0, 1, 2)), m3)) self.assertTrue(np.allclose(np.sum(p["result"], (1, 2, 3)), m0)) self.assertTrue(np.allclose(np.sum(p["result"], (2, 3, 0)), m1)) self.assertTrue(np.allclose(np.sum(p["result"], (3, 0, 1)), m2)) # check dimension consistency check works s = np.ones([2, 3, 7, 5]) m1 = np.ones([2, 3], dtype=int) * 5 * 7 m2 = np.ones([3, 5], dtype=int) * 7 * 2 m3 = np.ones([5, 7], dtype=int) * 2 * 3 p = hl.qisi(s, [np.array([0, 1]), np.array([1, 2]), np.array([2, 3])], [m1, m2, m3]) self.assertEqual( p, "seed dimensions [2, 3, 7, 5] are inconsistent with that implied by marginals ([2, 3, 5, 7])" ) p = hl.ipf(s, [np.array([0, 1]), np.array([1, 2]), np.array([2, 3])], [m1.astype(float), m2.astype(float), m3.astype(float)]) self.assertEqual( p, "seed dimensions [2, 3, 7, 5] are inconsistent with that implied by marginals ([2, 3, 5, 7])" ) s = np.ones((2, 3, 5)) p = hl.qisi(s, [np.array([0, 1]), np.array([1, 2]), np.array([2, 3])], [m1, m2, m3]) self.assertEqual( p, "seed dimensions 3 is inconsistent with that implied by marginals (4)" ) p = hl.ipf(s, [np.array([0, 1]), np.array([1, 2]), np.array([2, 3])], [m1.astype(float), m2.astype(float), m3.astype(float)]) self.assertEqual( p, "seed dimensions 3 is inconsistent with that implied by marginals (4)" )