Example #1
0
    def __init__(self, region, resolution, cache_dir, upstream_dir, input_dir,
                 output_dir):

        self.region = region
        self.resolution = resolution
        self.upstream_dir = upstream_dir
        self.input_dir = input_dir
        self.output_dir = output_dir

        self.scotland = False
        if self.region[0] == "S":
            self.scotland = True

        # load the subnational household projections
        self.snhpdata = SNHPData.SNHPData(cache_dir)
        # old way (needed for pre-2014/6 data for Wales/Scotland/NI)
        if not self.scotland:
            self.snhp_fallback = pd.read_csv(self.input_dir + "/snhp2014.csv",
                                             index_col="AreaCode")
        else:
            self.snhp_fallback = pd.read_csv(self.input_dir +
                                             "/snhp2016_sc.csv",
                                             index_col="GEOGRAPHY_CODE")

        # load the output from the microsynthesis (census 2011 based)
        self.base_population = self.__get_base_populationdata()
Example #2
0
def fetch_dummy_data_into_raw():
    if not os.environ['NOMIS_API_KEY'] == "DUMMY":
        print("This Function requires to be NOMIS_API_KEY == 'DUMMY' in env.\n"
              "Currently set to {} ".format(os.environ['NOMIS_API_KEY']))
        sys.exit()
    NPPData.NPPData(test_data_dir)
    MYEData.MYEData(test_data_dir)
    SNPPData.SNPPData(test_data_dir)
    SNHPData.SNHPData(test_data_dir)
Example #3
0
    def setUp(self):
        """
    Check env set up correctly for tests
    (it's too late to override the env in this function unfortunately)
    """
        self.mye = MYEData.MYEData(TEST_DATA_DIR)
        self.npp = NPPData.NPPData(TEST_DATA_DIR)
        self.snpp = SNPPData.SNPPData(TEST_DATA_DIR)
        self.snhp = SNHPData.SNHPData(TEST_DATA_DIR)

        # fix issue with test dataset
        self.snpp.data[utils.EN].PROJECTED_YEAR_NAME = self.snpp.data[utils.EN].PROJECTED_YEAR_NAME.astype(int)
Example #4
0
    def __init__(self, input_files, ht_trans, cache_dir):

        self.cache_dir = cache_dir
        # guard for no input data (if more MPI processes than input files)
        if not len(input_files):
            raise ValueError("proc {}/{}: no input data".format(
                no.mpi.rank(), no.mpi.size()))
        self.lads = [file.split("_")[1] for file in input_files]
        # assumes all files in same dir
        self.data_dir = os.path.dirname(input_files[0])

        # store as dict of DFs
        self.pop = pd.DataFrame()

        for file in input_files:
            no.log("reading initial population: %s" % file)
            data = pd.read_csv(file)
            data["LAD"] = file.split("_")[1]
            self.pop = self.pop.append(data)
        # no.log(self.pop.LC4408_C_AHTHUK11.unique())
        # self.cat = self.pop.LC4408_C_AHTHUK11.unique()
        # "C_AHTHUK11": {
        #   "0": "All categories: Household type",
        #   "1": "One person household",
        #   "2": "Married or same-sex civil partnership couple household",
        #   "3": "Cohabiting couple household",
        #   "4": "Lone parent household",
        #   "5": "Multi-person household"
        # }
        self.cat = {"LC4408_C_AHTHUK11": np.array([1, 2, 3, 4, 5])}

        # NOTE: pandas stores column-major order but numpy view is row major so the matrix looks right but is actually transposed
        # (no amount of transposing actually changes the memory layout (it just changes the view)
        # the C++ code assumes the transition matrix is column major (col sums to unity not rows)
        self.t = pd.read_csv(ht_trans).set_index(
            "initial state").values / 100.0
        # check rows sum to unity
        assert np.allclose(np.sum(self.t, 1), np.ones(len(self.t)))

        # TODO get snhp
        self.snhp = SNHPData.SNHPData(self.cache_dir)

        self.projection = self.snhp.aggregate(self.lads)
Example #5
0
    def __init__(self, cache_dir, file_pattern, areas):
        self.areas = areas
        self.file_pattern = file_pattern
        self.cache_dir = cache_dir

        year = no.timeline[0]

        # store as dict of DFs
        self.pop = {}
        for area in areas:
            file = os.path.join(self.cache_dir,
                                self.file_pattern % (area, year))
            no.log("reading initial population: %s" % file)
            self.pop[area] = pd.read_csv(file)
            self.pop[area]["LC4408_C_AHTHUK11_orig"] = self.pop[
                area].LC4408_C_AHTHUK11
        # no.log(self.pop.LC4408_C_AHTHUK11.unique())
        # self.cat = self.pop.LC4408_C_AHTHUK11.unique()
        # "C_AHTHUK11": {
        #   "0": "All categories: Household type",
        #   "1": "One person household",
        #   "2": "Married or same-sex civil partnership couple household",
        #   "3": "Cohabiting couple household",
        #   "4": "Lone parent household",
        #   "5": "Multi-person household"
        # }
        self.cat = {"LC4408_C_AHTHUK11": np.array([1, 2, 3, 4, 5])}

        # NOTE: pandas stores column-major order but numpy view is row major so the matrix looks right but is actually transposed
        # (no amount of transposing actually changes the memory layout (it just changes the view)
        # the C++ code assumes the transition matrix is column major (col sums to unity not rows)
        self.t = pd.read_csv(
            os.path.join(self.cache_dir, "w_hhtype_dv-tpm.csv")).set_index(
                "initial state").values / 100.0
        # check rows sum to unity
        assert np.allclose(np.sum(self.t, 1), np.ones(len(self.t)))

        # TODO get snhp
        self.snhp = SNHPData.SNHPData(self.cache_dir)

        self.projection = self.snhp.aggregate(self.areas)
Example #6
0
    def setUp(self):
        """
    Check env set up correctly for tests. It's too late to override the env in this function unfortunately.
    """
        print(
            "Warning: Some SNPP tests are disabled temporarily for the sake of development of the new dynamic "
            "microsimulation but the code works")
        # Build the test data objects from the raw_data directory.
        self.mye = MYEData.MYEData(
            TEST_DATA_DIR
        )  # Needs to be complete data for tests when upgrading to new estimates.
        self.npp = NPPData.NPPData(
            TEST_DATA_DIR)  # Need to build the test version every migration.
        self.snpp = SNPPData.SNPPData(
            TEST_DATA_DIR)  # Need to build the test version every migration.
        self.snhp = SNHPData.SNHPData(TEST_DATA_DIR)

        # fix issue with test dataset
        self.snpp.data[utils.EN].PROJECTED_YEAR_NAME = self.snpp.data[
            utils.EN].PROJECTED_YEAR_NAME.astype(int)

        if not self.npp.data_api.key == "DUMMY" or not self.snpp.data_api.key == "DUMMY":
            print("Test requires NOMIS_API_KEY=DUMMY in env")
            sys.exit()
Example #7
0
    def __init__(self, params):

        self.coverage = {
            "EW": ukpoputils.EW,
            "GB": ukpoputils.GB,
            "UK": ukpoputils.UK
        }.get(params["coverage"])
        if not self.coverage:
            raise RuntimeError("invalid coverage: %s" % params["coverage"])

        self.cache_dir = params["cache_dir"]
        # initialise data sources
        self.census_ew = Nomisweb.Nomisweb(self.cache_dir)
        self.census_sc = NRScotland.NRScotland(self.cache_dir)
        self.census_ni = NISRA.NISRA(self.cache_dir)
        # population projections
        self.mye = MYEData.MYEData(self.cache_dir)
        self.snpp = SNPPData.SNPPData(self.cache_dir)
        self.npp = NPPData.NPPData(self.cache_dir)
        # households
        self.baseline = params["base_projection"]

        if not os.path.isdir(params["output_dir"]):
            raise ValueError("Output directory %s not found" %
                             params["output_dir"])

        self.output_file = os.path.join(
            params["output_dir"], "simim_%s_%s_%s" %
            (params["model_type"], params["base_projection"],
             os.path.basename(params["scenario"])))
        self.custom_snpp_variant = pd.DataFrame()

        self.snhp = SNHPData.SNHPData(self.cache_dir)

        # holder for shapefile when requested
        self.shapefile = None
import matplotlib.pyplot as plt
import ukpopulation.snppdata as SNPPData
import ukpopulation.snhpdata as SNHPData
#import ukpopulation.utils as utils

# initialise the population modules
snhp = SNHPData.SNHPData()
snpp = SNPPData.SNPPData()

lad = "E08000021"  # Newcastle

start_year = 2016
end_year = snhp.max_year(lad)
# get the total
hh = snhp.aggregate(lad, range(start_year, end_year + 1))
p = snpp.aggregate(["C_AGE", "GENDER"], lad, range(start_year, end_year + 1))

# plot the data
fig, ax1 = plt.subplots()
ax1.plot(hh.PROJECTED_YEAR_NAME, hh.OBS_VALUE, "bo", label="households")
ax1.set_xlabel("Year")
ax1.set_ylabel("Households")
ax1.legend()
ax2 = ax1.twinx()
ax2.plot(p.PROJECTED_YEAR_NAME, p.OBS_VALUE, "ro", label="people")
ax2.set_xlabel("Year")
ax2.set_ylabel("People")
ax2.legend(loc=4)

plt.title(lad + " Households/People Projections")
plt.show()
Example #9
0
def fetch_full_data_into_cache():
    npp = NPPData.NPPData()
    npp.force_load_variants(['hhh', 'ppp', 'lll'])
    MYEData.MYEData()
    SNPPData.SNPPData()
    SNHPData.SNHPData()
Example #10
0
import pandas as pd

import ukpopulation.snhpdata as SNHPData
import ukpopulation.utils  #as utils

snhp_e = pd.read_csv("data/ons_hh_e_2016-2041.csv").drop(
    [str(y) for y in range(2001, 2014)], axis=1)
snhp_w = pd.read_csv("data/hh_w_2014-2039.csv").drop(
    ["Unnamed: 0", "Unnamed: 1"], axis=1)

snhp_s = SNHPData.SNHPData("../microsimulation/cache").data[
    ukpopulation.utils.SC]

snhp_e = snhp_e.groupby("CODE").sum().reset_index().rename(
    {"CODE": "GEOGRAPHY_CODE"}, axis=1)
snhp_e = snhp_e[snhp_e.GEOGRAPHY_CODE.str.startswith("E0")]
#print(snhp_e)

snhp_w = snhp_w.groupby("GEOGRAPHY_CODE").sum().reset_index()
#print(snhp_w)

#print(snhp_s)

snhp = pd.concat([snhp_e, snhp_w, snhp_s], ignore_index=True, sort=False)

snhp.to_csv("./snhp.csv")