Esempio n. 1
0
  def __microsynthesise(self, year): #LAD=self.region

    # Census/seed proportions for geography and ethnicity
    oa_prop = self.seed.sum((1, 2, 3)) / self.seed.sum()
    eth_prop = self.seed.sum((0, 1, 2)) / self.seed.sum()

    if year < self.snpp_api.min_year(self.region):
      age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.mye_api.filter(self.region, year)), self.region)
    elif year <= self.npp_api.max_year():
      # Don't attempt to apply NPP variant if before the start of the NPP data
      if year < self.npp_api.min_year():
        age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.snpp_api.filter(self.region, year)), self.region)
      else:
        age_sex = utils.create_age_sex_marginal(utils.adjust_pp_age(self.snpp_api.create_variant(self.variant, self.npp_api, self.region, year)), self.region)
    else:
      raise ValueError("Cannot microsimulate past NPP horizon year ({})", self.npp_api.max_year())

    # convert proportions/probabilities to integer frequencies
    oa = hl.prob2IntFreq(oa_prop, age_sex.sum())["freq"]
    eth = hl.prob2IntFreq(eth_prop, age_sex.sum())["freq"]
    # combine the above into a 2d marginal using QIS-I and census 2011 or later data as the seed
    oa_eth = hl.qisi(self.seed.sum((1, 2)), [np.array([0]), np.array([1])], [oa, eth])
    if not (isinstance(oa_eth, dict) and oa_eth["conv"]):
      raise RuntimeError("oa_eth did not converge")

    # now the full seeded microsynthesis
    if self.fast_mode:
      msynth = hl.ipf(self.seed, [np.array([0, 3]), np.array([1, 2])], [oa_eth["result"].astype(float), age_sex.astype(float)])
    else:
      msynth = hl.qisi(self.seed, [np.array([0, 3]), np.array([1, 2])], [oa_eth["result"], age_sex])
    if not msynth["conv"]:
      print(msynth)
      raise RuntimeError("msynth did not converge")
    #print(msynth["pop"])
    if self.fast_mode:
      print("updating seed to", year, " ", end="")
      self.seed = msynth["result"]
      msynth["result"] = np.around(msynth["result"]).astype(int)
    else:
      print("updating seed to", year, " ", end="")
      self.seed = msynth["result"].astype(float)
    rawtable = hl.flatten(msynth["result"]) #, c("OA", "SEX", "AGE", "ETH"))

    # col names and remapped values
    table = pd.DataFrame(columns=["Area", "DC1117EW_C_SEX", "DC1117EW_C_AGE", "DC2101EW_C_ETHPUK11"])
    table.Area = utils.remap(rawtable[0], self.geog_map)
    table.DC1117EW_C_SEX = utils.remap(rawtable[1], [1, 2])
    table.DC1117EW_C_AGE = utils.remap(rawtable[2], range(1, 87))
    table.DC2101EW_C_ETHPUK11 = utils.remap(rawtable[3], self.eth_map)

    # consistency checks (in fast mode just report discrepancies)
    self.__check(table, age_sex, oa_eth["result"])

    return table
Esempio n. 2
0
  def test_QISI(self):
    m0 = np.array([52, 48]) 
    m1 = np.array([10, 77, 13])
    i0 = np.array([0])
    i1 = np.array([1])
    s = np.ones([len(m0), len(m1)])

    p = hl.qisi(s, [i0, i1], [m0, m1])
    print(p)
    self.assertTrue(p["conv"])
    self.assertLess(p["chiSq"], 0.04) 
    self.assertGreater(p["pValue"], 0.9) 
    #self.assertLess(p["degeneracy"], 0.04) TODO check the calculation
    self.assertEqual(p["pop"], 100.0)
    self.assertTrue(np.allclose(np.sum(p["result"], 0), m1))
    self.assertTrue(np.allclose(np.sum(p["result"], 1), m0))
    #self.assertTrue(np.array_equal(p["result"], np.array([[5, 40, 7],[5, 37, 6]])))

    m0 = np.array([52, 40, 4, 4]) 
    m1 = np.array([87, 10, 3])
    m2 = np.array([55, 15, 6, 12, 12])
    i0 = np.array([0])
    i1 = np.array([1])
    i2 = np.array([2])
    s = np.ones([len(m0), len(m1), len(m2)])

    p = hl.qisi(s, [i0, i1, i2], [m0, m1, m2])
    self.assertTrue(p["conv"])
    self.assertLess(p["chiSq"], 70) # seems a bit high
    self.assertGreater(p["pValue"], 0.0) # seems a bit low
    self.assertEqual(p["pop"], 100.0)
    self.assertTrue(np.allclose(np.sum(p["result"], (0, 1)), m2))
    self.assertTrue(np.allclose(np.sum(p["result"], (1, 2)), m0))
    self.assertTrue(np.allclose(np.sum(p["result"], (2, 0)), m1))

    m0 = np.array([52, 48]) 
    m1 = np.array([87, 13])
    m2 = np.array([67, 33])
    m3 = np.array([55, 45])
    i0 = np.array([0])
    i1 = np.array([1])
    i2 = np.array([2])
    i3 = np.array([3])
    s = np.ones([len(m0), len(m1), len(m2), len(m3)])

    p = hl.qisi(s, [i0, i1, i2, i3], [m0, m1, m2, m3])
    self.assertTrue(p["conv"])
    self.assertLess(p["chiSq"], 5.5) 
    self.assertGreater(p["pValue"], 0.02)
    self.assertEqual(p["pop"], 100)
    self.assertTrue(np.allclose(np.sum(p["result"], (0, 1, 2)), m3))
    self.assertTrue(np.allclose(np.sum(p["result"], (1, 2, 3)), m0))
    self.assertTrue(np.allclose(np.sum(p["result"], (2, 3, 0)), m1))
    self.assertTrue(np.allclose(np.sum(p["result"], (3, 0, 1)), m2))
Esempio n. 3
0
    def __add_households(self, area, constraints):

        # TODO use actual values from tables
        # TODO make members?                            # Dim (overall dim)
        tenure_map = self.lc4402.C_TENHUK11.unique()  # 0
        rooms_map = self.lc4404.C_ROOMS.unique()  # 1
        occupants_map = self.lc4404.C_SIZHUK11.unique()  # 2
        bedrooms_map = self.lc4405.C_BEDROOMS.unique()  # 3 [1,2,3,4] or [-1]
        hhtype_map = self.lc4408.C_AHTHUK11.unique()  # 4
        #
        ch_map = self.lc4402.C_CENHEATHUK11.unique()  # 1 (5)
        buildtype_map = self.lc4402.C_TYPACCOM.unique()  # 2 (6)
        eth_map = self.lc4202.C_ETHHUK11.unique()  # 3 (7)
        cars_map = self.lc4202.C_CARSNO.unique()  # 4 (8)
        econ_map = self.lc4605.C_NSSEC.unique()  # 5 (9)

        tenure_rooms_occ = self.lc4404.loc[self.lc4404.GEOGRAPHY_CODE ==
                                           area].copy()
        # unmap indices
        # TODO might be quicker to unmap the entire table upfront?
        utils.unmap(tenure_rooms_occ.C_TENHUK11, tenure_map)
        utils.unmap(tenure_rooms_occ.C_ROOMS, rooms_map)
        utils.unmap(tenure_rooms_occ.C_SIZHUK11, occupants_map)

        m4404 = utils.unlistify(
            tenure_rooms_occ, ["C_TENHUK11", "C_ROOMS", "C_SIZHUK11"],
            [len(tenure_map),
             len(rooms_map),
             len(occupants_map)], "OBS_VALUE")

        # no bedroom info in Scottish data
        tenure_beds_occ = self.lc4405.loc[self.lc4405.GEOGRAPHY_CODE ==
                                          area].copy()

        # unmap indices
        utils.unmap(tenure_beds_occ.C_BEDROOMS, bedrooms_map)
        utils.unmap(tenure_beds_occ.C_TENHUK11, tenure_map)
        utils.unmap(tenure_beds_occ.C_SIZHUK11, occupants_map)

        m4405 = utils.unlistify(
            tenure_beds_occ, ["C_TENHUK11", "C_BEDROOMS", "C_SIZHUK11"],
            [len(tenure_map),
             len(bedrooms_map),
             len(occupants_map)], "OBS_VALUE")
        #    print(m4405.shape)

        tenure_accom = self.lc4408.loc[self.lc4408.GEOGRAPHY_CODE ==
                                       area].copy()

        utils.unmap(tenure_accom.C_TENHUK11, tenure_map)
        utils.unmap(tenure_accom.C_AHTHUK11, hhtype_map)

        m4408 = utils.unlistify(
            tenure_accom, ["C_TENHUK11", "C_AHTHUK11"],
            [len(tenure_map), len(hhtype_map)], "OBS_VALUE")
        #print(np.sum(m4404), np.sum(m4405), np.sum(m4408))

        # TODO relax IPF tolerance and maxiters when used within QISI?
        m4408dim = np.array([0, 4])
        # collapse m4408 dim for scotland
        if self.scotland:
            m4408 = np.sum(m4408, axis=0)
            m4408dim = np.array([4])
        p0 = humanleague.qisi(
            constraints, [np.array([0, 1, 2]),
                          np.array([0, 3, 2]), m4408dim],
            [m4404, m4405, m4408])

        # drop the survey seed if there are convergence problems
        # TODO check_humanleague_result needs complete refactoring
        if not isinstance(p0, dict) or not p0["conv"]:
            print("Dropping TROBH constraint due to convergence failure")
            p0 = humanleague.qisi(
                seed.get_impossible_TROBH(),
                [np.array([0, 1, 2]),
                 np.array([0, 3, 2]), m4408dim], [m4404, m4405, m4408])
            utils.check_humanleague_result(p0, [m4404, m4405, m4408],
                                           seed.get_impossible_TROBH())
        else:
            utils.check_humanleague_result(p0, [m4404, m4405, m4408],
                                           constraints)

        #print("p0 ok")

        tenure_ch_accom = self.lc4402.loc[self.lc4402.GEOGRAPHY_CODE ==
                                          area].copy()
        utils.unmap(tenure_ch_accom.C_CENHEATHUK11, ch_map)
        utils.unmap(tenure_ch_accom.C_TENHUK11, tenure_map)
        utils.unmap(tenure_ch_accom.C_TYPACCOM, buildtype_map)

        m4402 = utils.unlistify(
            tenure_ch_accom, ["C_TENHUK11", "C_CENHEATHUK11", "C_TYPACCOM"],
            [len(tenure_map), len(ch_map),
             len(buildtype_map)], "OBS_VALUE")

        tenure_eth_car = self.lc4202.loc[self.lc4202.GEOGRAPHY_CODE ==
                                         area].copy()
        utils.unmap(tenure_eth_car.C_ETHHUK11, eth_map)
        utils.unmap(tenure_eth_car.C_CARSNO, cars_map)
        utils.unmap(tenure_eth_car.C_TENHUK11, tenure_map)

        m4202 = utils.unlistify(
            tenure_eth_car, ["C_TENHUK11", "C_ETHHUK11", "C_CARSNO"],
            [len(tenure_map), len(eth_map),
             len(cars_map)], "OBS_VALUE")

        econ = self.lc4605.loc[self.lc4605.GEOGRAPHY_CODE == area].copy()
        utils.unmap(econ.C_NSSEC, econ_map)
        utils.unmap(econ.C_TENHUK11, tenure_map)

        # econ counts often slightly lower, need to tweak
        ##econ = utils.adjust(econ, tenure_eth_car)

        m4605 = utils.unlistify(
            econ, ["C_TENHUK11", "C_NSSEC"],
            [len(tenure_map), len(econ_map)], "OBS_VALUE")

        m4605_sum = np.sum(m4605)
        m4202_sum = np.sum(m4202)

        if m4605_sum != m4202_sum:
            print("LC4402: %d LC4605: %d -> %d " %
                  (np.sum(m4402), m4605_sum, m4202_sum),
                  end="")
            tenure_4202 = np.sum(m4202, axis=(1, 2))
            nssec_4605_adj = humanleague.prob2IntFreq(
                np.sum(m4605, axis=0) / m4605_sum, m4202_sum)["freq"]
            #      m4605_adj = humanleague.qisi(m4605.astype(float), [np.array([0]), np.array([1])], [tenure_4202, nssec_4605_adj])
            # Convergence problems can occur when e.g. one of the tenure rows is zero yet the marginal total is nonzero,
            # Can get round this by adding a small number to the seed
            # effectively allowing zero states to be occupied with a finite probability
            #      if not m4605_adj["conv"]:
            m4605_adj = humanleague.qisi(
                m4605.astype(float) + 1.0 / m4202_sum,
                [np.array([0]), np.array([1])], [tenure_4202, nssec_4605_adj])

            utils.check_humanleague_result(m4605_adj,
                                           [tenure_4202, nssec_4605_adj])
            m4605 = m4605_adj["result"]
            #print("econ adj ok")

        # print(np.sum(p0["result"], axis=(1,2,3,4)))
        # print(np.sum(m4402, axis=(1,2)))
        # print(np.sum(m4202, axis=(1,2)))
        # print(np.sum(m4605, axis=1))

        # no seed constraint so just use QIS
        if self.scotland:
            # tenures not mappable in LC4202
            m4202 = np.sum(m4202, axis=0)
            m4605 = np.sum(m4605, axis=0)
            p1 = humanleague.qis([
                np.array([0, 1, 2, 3, 4]),
                np.array([0, 5, 6]),
                np.array([7, 8]),
                np.array([9])
            ], [p0["result"], m4402, m4202, m4605])
            #p1 = humanleague.qis([np.array([0, 1, 2, 3]), np.array([0, 4, 5]), np.array([0, 6, 7])], [p0["result"], m4402, m4202])
        else:
            p1 = humanleague.qis([
                np.array([0, 1, 2, 3, 4]),
                np.array([0, 5, 6]),
                np.array([0, 7, 8]),
                np.array([0, 9])
            ], [p0["result"], m4402, m4202, m4605])
            #p1 = humanleague.qis([np.array([0, 1, 2, 3]), np.array([0, 4, 5]), np.array([0, 6, 7])], [p0["result"], m4402, m4202])
        utils.check_humanleague_result(p1, [p0["result"], m4402, m4202, m4605])
        #print("p1 ok")

        table = humanleague.flatten(p1["result"])

        chunk = pd.DataFrame(columns=self.dwellings.columns.values)
        chunk.Area = np.repeat(area, len(table[0]))
        chunk.LC4402_C_TENHUK11 = utils.remap(table[0], tenure_map)
        chunk.QS420_CELL = np.repeat(self.NOTAPPLICABLE, len(table[0]))
        chunk.LC4404_C_ROOMS = utils.remap(table[1], rooms_map)
        chunk.LC4404_C_SIZHUK11 = utils.remap(table[2], occupants_map)
        chunk.LC4405EW_C_BEDROOMS = utils.remap(table[3], bedrooms_map)
        chunk.LC4408_C_AHTHUK11 = utils.remap(table[4], hhtype_map)
        chunk.LC4402_C_CENHEATHUK11 = utils.remap(table[5], ch_map)
        chunk.LC4402_C_TYPACCOM = utils.remap(table[6], buildtype_map)
        chunk.CommunalSize = np.repeat(self.NOTAPPLICABLE, len(table[0]))
        chunk.LC4202_C_ETHHUK11 = utils.remap(table[7], eth_map)
        chunk.LC4202_C_CARSNO = utils.remap(table[8], cars_map)
        chunk.LC4605_C_NSSEC = utils.remap(table[9], econ_map)
        #print(chunk.head())
        self.dwellings = self.dwellings.append(chunk, ignore_index=True)
Esempio n. 4
0
    def test_QISI(self):
        m0 = np.array([52, 48])
        m1 = np.array([10, 77, 13])
        i0 = np.array([0])
        i1 = np.array([1])
        s = np.ones([len(m0), len(m1)])

        p = hl.qisi(s, [i0, i1], [m0, m1])
        #print(p)
        self.assertTrue(p["conv"])
        self.assertLess(p["chiSq"], 0.04)
        self.assertGreater(p["pValue"], 0.9)
        #self.assertLess(p["degeneracy"], 0.04) TODO check the calculation
        self.assertEqual(p["pop"], 100.0)
        self.assertTrue(np.allclose(np.sum(p["result"], 0), m1))
        self.assertTrue(np.allclose(np.sum(p["result"], 1), m0))
        #self.assertTrue(np.array_equal(p["result"], np.array([[5, 40, 7],[5, 37, 6]])))

        m0 = np.array([52, 40, 4, 4])
        m1 = np.array([87, 10, 3])
        m2 = np.array([55, 15, 6, 12, 12])
        i0 = np.array([0])
        i1 = np.array([1])
        i2 = np.array([2])
        s = np.ones([len(m0), len(m1), len(m2)])

        p = hl.qisi(s, [i0, i1, i2], [m0, m1, m2])
        self.assertTrue(p["conv"])
        self.assertLess(p["chiSq"], 70)  # seems a bit high
        self.assertGreater(p["pValue"], 0.0)  # seems a bit low
        self.assertEqual(p["pop"], 100.0)
        self.assertTrue(np.allclose(np.sum(p["result"], (0, 1)), m2))
        self.assertTrue(np.allclose(np.sum(p["result"], (1, 2)), m0))
        self.assertTrue(np.allclose(np.sum(p["result"], (2, 0)), m1))

        m0 = np.array([52, 48])
        m1 = np.array([87, 13])
        m2 = np.array([67, 33])
        m3 = np.array([55, 45])
        i0 = np.array([0])
        i1 = np.array([1])
        i2 = np.array([2])
        i3 = np.array([3])
        s = np.ones([len(m0), len(m1), len(m2), len(m3)])

        p = hl.qisi(s, [i0, i1, i2, i3], [m0, m1, m2, m3])
        self.assertTrue(p["conv"])
        self.assertLess(p["chiSq"], 5.5)
        self.assertGreater(p["pValue"], 0.02)
        self.assertEqual(p["pop"], 100)
        self.assertTrue(np.allclose(np.sum(p["result"], (0, 1, 2)), m3))
        self.assertTrue(np.allclose(np.sum(p["result"], (1, 2, 3)), m0))
        self.assertTrue(np.allclose(np.sum(p["result"], (2, 3, 0)), m1))
        self.assertTrue(np.allclose(np.sum(p["result"], (3, 0, 1)), m2))

        # check dimension consistency check works
        s = np.ones([2, 3, 7, 5])
        m1 = np.ones([2, 3], dtype=int) * 5 * 7
        m2 = np.ones([3, 5], dtype=int) * 7 * 2
        m3 = np.ones([5, 7], dtype=int) * 2 * 3
        p = hl.qisi(s, [np.array([0, 1]),
                        np.array([1, 2]),
                        np.array([2, 3])], [m1, m2, m3])
        self.assertEqual(
            p,
            "seed dimensions [2, 3, 7, 5] are inconsistent with that implied by marginals ([2, 3, 5, 7])"
        )

        p = hl.ipf(s, [np.array([0, 1]),
                       np.array([1, 2]),
                       np.array([2, 3])],
                   [m1.astype(float),
                    m2.astype(float),
                    m3.astype(float)])
        self.assertEqual(
            p,
            "seed dimensions [2, 3, 7, 5] are inconsistent with that implied by marginals ([2, 3, 5, 7])"
        )

        s = np.ones((2, 3, 5))
        p = hl.qisi(s, [np.array([0, 1]),
                        np.array([1, 2]),
                        np.array([2, 3])], [m1, m2, m3])
        self.assertEqual(
            p,
            "seed dimensions 3 is inconsistent with that implied by marginals (4)"
        )

        p = hl.ipf(s, [np.array([0, 1]),
                       np.array([1, 2]),
                       np.array([2, 3])],
                   [m1.astype(float),
                    m2.astype(float),
                    m3.astype(float)])
        self.assertEqual(
            p,
            "seed dimensions 3 is inconsistent with that implied by marginals (4)"
        )
Esempio n. 5
0
    def __get_census_data_sc(self):

        print(
            "Synthesising Scottish DC1117/DC2101 tables from LAD-level seeds and univariate data"
        )

        # age only, no gender
        qs103sc = self.data_api_sc.get_data(
            "QS103SC",
            self.region,
            self.resolution,
            category_filters={"QS103SC_0_CODE": range(1, 102)})
        qs103sc = utils.cap_value(qs103sc, "QS103SC_0_CODE", 86, "OBS_VALUE")
        # sex only
        qs104sc = self.data_api_sc.get_data(
            "QS104SC",
            self.region,
            self.resolution,
            category_filters={"QS104SC_0_CODE": [1, 2]})

        ngeogs = len(qs103sc.GEOGRAPHY_CODE.unique())
        nages = len(qs103sc.QS103SC_0_CODE.unique())
        nsexes = 2

        # Get a LAD-level seed population by age and gender
        dc1117lad = self.data_api_sc.get_data("DC1117SC",
                                              self.region,
                                              "LAD",
                                              category_filters={
                                                  "DC1117SC_0_CODE": [1, 2],
                                                  "DC1117SC_1_CODE":
                                                  range(1, 102)
                                              })
        dc1117lad = utils.cap_value(dc1117lad, "DC1117SC_1_CODE", 86,
                                    "OBS_VALUE")
        dc1117seed = utils.unlistify(dc1117lad,
                                     ["DC1117SC_0_CODE", "DC1117SC_1_CODE"],
                                     [2, 86], "OBS_VALUE").astype(float)
        # expand to all geogs within LAD
        dc1117seed = np.dstack([dc1117seed] * ngeogs).T

        ga = utils.unlistify(qs103sc, ["GEOGRAPHY_CODE", "QS103SC_0_CODE"],
                             [ngeogs, nages], "OBS_VALUE")
        gs = utils.unlistify(qs104sc, ["GEOGRAPHY_CODE", "QS104SC_0_CODE"],
                             [ngeogs, nsexes], "OBS_VALUE")
        msynth = hl.qisi(dc1117seed,
                         [np.array([0, 1]), np.array([0, 2])], [ga, gs])
        #msynth = hl.qis([np.array([0,1]), np.array([0,2])], [ga,gs])
        utils.check_result(msynth)
        # TODO pending humanleague seed consistency check
        assert dc1117seed.shape == msynth["result"].shape

        dc1117sc = utils.listify(msynth["result"], "OBS_VALUE",
                                 ["GEOGRAPHY_CODE", "C_AGE", "C_SEX"])
        dc1117sc.GEOGRAPHY_CODE = utils.remap(dc1117sc.GEOGRAPHY_CODE,
                                              qs103sc.GEOGRAPHY_CODE.unique())
        dc1117sc.C_AGE = utils.remap(dc1117sc.C_AGE,
                                     qs103sc.QS103SC_0_CODE.unique())
        dc1117sc.C_SEX = utils.remap(dc1117sc.C_SEX, [1, 2])
        #print(dc1117sc.head())

        # These ETH codes are slightly different to E&W codes...
        # ETH Totals = [1,8,9,15,18,22]
        #eths = [2,3,4,5,6,7,8,10,11,12,13,14,16,17,19,20,21,23,24]
        eths = [1, 8, 9, 15, 18, 22]
        ks201sc = self.data_api_sc.get_data(
            "KS201SC",
            self.region,
            self.resolution,
            category_filters={"KS201SC_0_CODE": eths})
        neths = len(ks201sc.KS201SC_0_CODE.unique())

        # Get a LAD-level seed population by age and gender
        dc2101lad = self.data_api_sc.get_data("DC2101SC",
                                              self.region,
                                              "LAD",
                                              category_filters={
                                                  "DC2101SC_0_CODE": eths,
                                                  "DC2101SC_1_CODE": [1, 2],
                                                  "DC2101SC_2_CODE": 0
                                              })
        dc2101seed = utils.unlistify(dc2101lad,
                                     ["DC2101SC_1_CODE", "DC2101SC_0_CODE"],
                                     [2, neths], "OBS_VALUE").astype(float)
        # expand to all geogs within LAD
        dc2101seed = np.dstack([dc2101seed] * ngeogs).T

        #print(ks201sc.head())
        ge = utils.unlistify(ks201sc, ["GEOGRAPHY_CODE", "KS201SC_0_CODE"],
                             [ngeogs, neths], "OBS_VALUE")
        # TODO use a LAD-level seed population
        msynth = hl.qisi(dc2101seed,
                         [np.array([0, 1]), np.array([0, 2])], [ge, gs])
        utils.check_result(msynth)
        assert dc2101seed.shape == msynth["result"].shape

        dc2101sc = utils.listify(msynth["result"], "OBS_VALUE",
                                 ["GEOGRAPHY_CODE", "C_ETHPUK11", "C_SEX"])
        dc2101sc.GEOGRAPHY_CODE = utils.remap(dc2101sc.GEOGRAPHY_CODE,
                                              qs103sc.GEOGRAPHY_CODE.unique())
        dc2101sc.C_ETHPUK11 = utils.remap(dc2101sc.C_ETHPUK11,
                                          ks201sc.KS201SC_0_CODE.unique())
        dc2101sc.C_SEX = utils.remap(dc2101sc.C_SEX, [1, 2])
        #print(dc2101sc.head())

        assert dc1117sc.OBS_VALUE.sum() == dc2101sc.OBS_VALUE.sum()

        #print(self.data_api_sc.get_metadata("DC6206SC", "LAD"))
        # TODO Aberdeen has 174869 in this table
        # dc6206sc = self.data_api_sc.get_data("DC6206SC", self.region, "LAD", category_filters={"DC6206SC_1_CODE": 0,
        #                                                                                        "DC6206SC_0_CODE": [1,2,3,4,5,6],
        #                                                                                        "DC6206SC_2_CODE": [1,2,3,4,5,6,7,8,9]})
        #print(dc6206sc.OBS_VALUE.sum())
        #print(dc6206sc.DC6206SC_2_CODE.unique())
        # # dc6206sc = self.data_api_sc.get_data("DC6206SC", "MSOA11", self.region)
        # #raise NotImplementedError("Problem with MSOA-level detailed characteristics in Scottish census data")

        return (dc1117sc, dc2101sc, None)