예제 #1
0
    def test_integerise(self):

        # probs not valid
        r = hl.prob2IntFreq(np.array([0.3, 0.3, 0.2, 0.1]), 10)
        self.assertTrue(r == "probabilities do not sum to unity")

        # pop not valid
        r = hl.prob2IntFreq(np.array([0.4, 0.3, 0.2, 0.1]), -1)
        self.assertTrue(r == "population cannot be negative")

        # zero pop
        r = hl.prob2IntFreq(np.array([0.4, 0.3, 0.2, 0.1]), 0)
        self.assertTrue(r["rmse"] == 0.0)
        self.assertTrue(np.array_equal(r["freq"], np.array([0, 0, 0, 0])))

        # exact
        r = hl.prob2IntFreq(np.array([0.4, 0.3, 0.2, 0.1]), 10)
        self.assertTrue(r["rmse"] == 0.0)
        self.assertTrue(np.array_equal(r["freq"], np.array([4, 3, 2, 1])))

        # inexact
        r = hl.prob2IntFreq(np.array([0.4, 0.3, 0.2, 0.1]), 17)
        self.assertAlmostEqual(r["rmse"], np.sqrt(0.075))
        self.assertTrue(np.array_equal(r["freq"], np.array([7, 5, 3, 2])))

        # 1-d case
        r = hl.integerise(np.array([2.0, 1.5, 1.0, 0.5]))
        self.assertTrue(r["conv"])

        # multidim integerisation
        # invalid population
        s = np.array([[1.1, 1.0], [1.0, 1.0]])
        r = hl.integerise(s)
        self.assertEqual(
            r,
            "Marginal or total value 4.100000 is not an integer (within tolerance 0.000100)"
        )
        # invalid marginals
        s = np.array([[1.1, 1.0], [0.9, 1.0]])
        r = hl.integerise(s)
        self.assertEqual(
            r,
            "Marginal or total value 2.100000 is not an integer (within tolerance 0.000100)"
        )

        # use IPF to generate a valid fractional population
        m0 = np.array([111, 112, 113, 114, 110], dtype=float)
        m1 = np.array([136, 142, 143, 139], dtype=float)
        s = np.ones([len(m0), len(m1), len(m0)])

        fpop = hl.ipf(s, [np.array(
            [0]), np.array([1]), np.array([2])], [m0, m1, m0])["result"]

        result = hl.integerise(fpop)
        self.assertTrue(result["conv"])
        self.assertEqual(np.sum(result["result"]), sum(m0))
        self.assertTrue(result["rmse"] < 1.05717)
예제 #2
0
  def __microsynthesise(self, year): #LAD=self.region

    # Census/seed proportions for geography and ethnicity
    oa_prop = self.seed.sum((1, 2, 3)) / self.seed.sum()
    eth_prop = self.seed.sum((0, 1, 2)) / self.seed.sum()

    if year < self.snpp_api.min_year(self.region):
      age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.mye_api.filter(self.region, year)), self.region)
    elif year <= self.npp_api.max_year():
      # Don't attempt to apply NPP variant if before the start of the NPP data
      if year < self.npp_api.min_year():
        age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.snpp_api.filter(self.region, year)), self.region)
      else:
        age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.snpp_api.create_variant(self.variant, self.npp_api, self.region, year)), self.region)
    else:
      raise ValueError("Cannot microsimulate past NPP horizon year ({})", self.npp_api.max_year())

    # convert proportions/probabilities to integer frequencies
    oa = hl.prob2IntFreq(oa_prop, age_sex.sum())["freq"]
    eth = hl.prob2IntFreq(eth_prop, age_sex.sum())["freq"]
    # combine the above into a 2d marginal using QIS-I and census 2011 or later data as the seed
    oa_eth = hl.qisi(self.seed.sum((1, 2)), [np.array([0]), np.array([1])], [oa, eth])
    if not (isinstance(oa_eth, dict) and oa_eth["conv"]):
      raise RuntimeError("oa_eth did not converge")

    # now the full seeded microsynthesis
    if self.fast_mode:
      msynth = hl.ipf(self.seed, [np.array([0, 3]), np.array([1, 2])], [oa_eth["result"].astype(float), age_sex.astype(float)])
    else:
      msynth = hl.qisi(self.seed, [np.array([0, 3]), np.array([1, 2])], [oa_eth["result"], age_sex])
    if not msynth["conv"]:
      print(msynth)
      raise RuntimeError("msynth did not converge")
    #print(msynth["pop"])
    if self.fast_mode:
      print("updating seed to", year, " ", end="")
      self.seed = msynth["result"]
      msynth["result"] = np.around(msynth["result"]).astype(int)
    else:
      print("updating seed to", year, " ", end="")
      self.seed = msynth["result"].astype(float)
    rawtable = hl.flatten(msynth["result"]) #, c("OA", "SEX", "AGE", "ETH"))

    # col names and remapped values
    table = pd.DataFrame(columns=["Area", "DC1117EW_C_SEX", "DC1117EW_C_AGE", "DC2101EW_C_ETHPUK11"])
    table.Area = utils.remap(rawtable[0], self.geog_map)
    table.DC1117EW_C_SEX = utils.remap(rawtable[1], [1, 2])
    table.DC1117EW_C_AGE = utils.remap(rawtable[2], range(1, 87))
    table.DC2101EW_C_ETHPUK11 = utils.remap(rawtable[3], self.eth_map)

    # consistency checks (in fast mode just report discrepancies)
    self.__check(table, age_sex, oa_eth["result"])

    return table
예제 #3
0
  def test_IPF(self):
    m0 = np.array([52.0, 48.0])
    m1 = np.array([87.0, 13.0])
    m2 = np.array([55.0, 45.0])
    i = [np.array([0]),np.array([1])]

    s = np.ones([len(m0), len(m1)])
    p = hl.ipf(s, i, [m0, m1])
    #print(p)
    self.assertTrue(p["conv"])
    self.assertEqual(p["pop"], 100.0)
    self.assertTrue(np.array_equal(p["result"], np.array([[45.24, 6.76], [41.76, 6.24]])))

    s[0, 0] = 0.7
    p = hl.ipf(s, i, [m0, m1])
    #print(np.sum(p["result"], 0))
    self.assertTrue(p["conv"])
    # check overall population and marginals correct
    self.assertEqual(np.sum(p["result"]), p["pop"])
    self.assertTrue(np.allclose(np.sum(p["result"], 0), m1))
    self.assertTrue(np.allclose(np.sum(p["result"], 1), m0))

    i = [np.array([0]),np.array([1]),np.array([2])]
    s = np.array([[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]])
    p = hl.ipf(s, i, [m0, m1, m2])
    print(np.sum(p["result"], (0, 1)))
    print(np.sum(p["result"], (1, 2)))
    print(np.sum(p["result"], (2, 0)))
    self.assertTrue(p["conv"])
    # check overall population and marginals correct
    self.assertAlmostEqual(np.sum(p["result"]), p["pop"]) # default is 7d.p.
    self.assertTrue(np.allclose(np.sum(p["result"], (0, 1)), m2))
    self.assertTrue(np.allclose(np.sum(p["result"], (1, 2)), m0))
    self.assertTrue(np.allclose(np.sum(p["result"], (2, 0)), m1))

    # 12D
    s = np.ones([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    i = [np.array([0]),np.array([1]),np.array([2]),np.array([3]),np.array([4]),np.array([5]),
         np.array([6]),np.array([7]),np.array([8]),np.array([9]),np.array([10]),np.array([11])]
    m = np.array([2048., 2048.])
    p = hl.ipf(s,i,[m, m, m, m, m, m, m, m, m, m, m, m])
    #print(p)
    self.assertTrue(p["pop"] == 4096)

    m0 = np.array([52.0, 48.0])
    m1 = np.array([87.0, 13.0])
    m2 = np.array([55.0, 45.0])

    seed = np.ones([len(m0), len(m1)])
    p = hl.ipf(seed, [np.array([0]),np.array([1])], [m0, m1])
    self.assertTrue(np.allclose(np.sum(p["result"], (0)), m1))
    self.assertTrue(np.allclose(np.sum(p["result"], (1)), m0))
    self.assertTrue(p["conv"])
    self.assertEqual(p["iterations"], 1)
    self.assertEqual(p["maxError"], 0.0)
    self.assertEqual(p["pop"], 100.0)
    self.assertTrue(np.array_equal(p["result"], np.array([[45.24, 6.76], [41.76, 6.24]])))

    seed[0, 1] = 0.7
    p = hl.ipf(seed, [np.array([0]),np.array([1])], [m0, m1])
    self.assertTrue(np.allclose(np.sum(p["result"], (0)), m1))
    self.assertTrue(np.allclose(np.sum(p["result"], (1)), m0))
    self.assertTrue(p["conv"])
    self.assertLess(p["iterations"], 6)
    self.assertLess(p["maxError"], 5e-10)
    self.assertEqual(p["pop"], 100.0)

    s = np.array([[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]])
    p = hl.ipf(s, [np.array([0]),np.array([1]),np.array([2])], [m0, m1, m2])
    print(np.sum(p["result"], (0, 1)))
    print(np.sum(p["result"], (1, 2)))
    print(np.sum(p["result"], (2, 0)))
    self.assertTrue(p["conv"])
    # check overall population and marginals correct
    self.assertAlmostEqual(np.sum(p["result"]), p["pop"]) # default is 7d.p.
    self.assertTrue(np.allclose(np.sum(p["result"], (0, 1)), m2))
    self.assertTrue(np.allclose(np.sum(p["result"], (1, 2)), m0))
    self.assertTrue(np.allclose(np.sum(p["result"], (2, 0)), m1))

    # 12D
    s = np.ones([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    m = np.array([2048., 2048.])
    p = hl.ipf(s, [np.array([0]),np.array([1]),np.array([2]),np.array([3]),np.array([4]),np.array([5]),np.array([6]),np.array([7]),np.array([8]),np.array([9]),np.array([10]),np.array([11])],[m, m, m, m, m, m, m, m, m, m, m, m])
    self.assertTrue(p["conv"] == True)
    self.assertTrue(p["pop"] == 4096)
예제 #4
0
    def test_QISI(self):
        m0 = np.array([52, 48])
        m1 = np.array([10, 77, 13])
        i0 = np.array([0])
        i1 = np.array([1])
        s = np.ones([len(m0), len(m1)])

        p = hl.qisi(s, [i0, i1], [m0, m1])
        #print(p)
        self.assertTrue(p["conv"])
        self.assertLess(p["chiSq"], 0.04)
        self.assertGreater(p["pValue"], 0.9)
        #self.assertLess(p["degeneracy"], 0.04) TODO check the calculation
        self.assertEqual(p["pop"], 100.0)
        self.assertTrue(np.allclose(np.sum(p["result"], 0), m1))
        self.assertTrue(np.allclose(np.sum(p["result"], 1), m0))
        #self.assertTrue(np.array_equal(p["result"], np.array([[5, 40, 7],[5, 37, 6]])))

        m0 = np.array([52, 40, 4, 4])
        m1 = np.array([87, 10, 3])
        m2 = np.array([55, 15, 6, 12, 12])
        i0 = np.array([0])
        i1 = np.array([1])
        i2 = np.array([2])
        s = np.ones([len(m0), len(m1), len(m2)])

        p = hl.qisi(s, [i0, i1, i2], [m0, m1, m2])
        self.assertTrue(p["conv"])
        self.assertLess(p["chiSq"], 70)  # seems a bit high
        self.assertGreater(p["pValue"], 0.0)  # seems a bit low
        self.assertEqual(p["pop"], 100.0)
        self.assertTrue(np.allclose(np.sum(p["result"], (0, 1)), m2))
        self.assertTrue(np.allclose(np.sum(p["result"], (1, 2)), m0))
        self.assertTrue(np.allclose(np.sum(p["result"], (2, 0)), m1))

        m0 = np.array([52, 48])
        m1 = np.array([87, 13])
        m2 = np.array([67, 33])
        m3 = np.array([55, 45])
        i0 = np.array([0])
        i1 = np.array([1])
        i2 = np.array([2])
        i3 = np.array([3])
        s = np.ones([len(m0), len(m1), len(m2), len(m3)])

        p = hl.qisi(s, [i0, i1, i2, i3], [m0, m1, m2, m3])
        self.assertTrue(p["conv"])
        self.assertLess(p["chiSq"], 5.5)
        self.assertGreater(p["pValue"], 0.02)
        self.assertEqual(p["pop"], 100)
        self.assertTrue(np.allclose(np.sum(p["result"], (0, 1, 2)), m3))
        self.assertTrue(np.allclose(np.sum(p["result"], (1, 2, 3)), m0))
        self.assertTrue(np.allclose(np.sum(p["result"], (2, 3, 0)), m1))
        self.assertTrue(np.allclose(np.sum(p["result"], (3, 0, 1)), m2))

        # check dimension consistency check works
        s = np.ones([2, 3, 7, 5])
        m1 = np.ones([2, 3], dtype=int) * 5 * 7
        m2 = np.ones([3, 5], dtype=int) * 7 * 2
        m3 = np.ones([5, 7], dtype=int) * 2 * 3
        p = hl.qisi(s, [np.array([0, 1]),
                        np.array([1, 2]),
                        np.array([2, 3])], [m1, m2, m3])
        self.assertEqual(
            p,
            "seed dimensions [2, 3, 7, 5] are inconsistent with that implied by marginals ([2, 3, 5, 7])"
        )

        p = hl.ipf(s, [np.array([0, 1]),
                       np.array([1, 2]),
                       np.array([2, 3])],
                   [m1.astype(float),
                    m2.astype(float),
                    m3.astype(float)])
        self.assertEqual(
            p,
            "seed dimensions [2, 3, 7, 5] are inconsistent with that implied by marginals ([2, 3, 5, 7])"
        )

        s = np.ones((2, 3, 5))
        p = hl.qisi(s, [np.array([0, 1]),
                        np.array([1, 2]),
                        np.array([2, 3])], [m1, m2, m3])
        self.assertEqual(
            p,
            "seed dimensions 3 is inconsistent with that implied by marginals (4)"
        )

        p = hl.ipf(s, [np.array([0, 1]),
                       np.array([1, 2]),
                       np.array([2, 3])],
                   [m1.astype(float),
                    m2.astype(float),
                    m3.astype(float)])
        self.assertEqual(
            p,
            "seed dimensions 3 is inconsistent with that implied by marginals (4)"
        )