def __init__(self, p, n): # states self.s = np.array(range(neworder.size())) # transition matrix self.p = np.identity(neworder.size()) * (1 - neworder.size() * p) + p self.n = n # all begin with unique id and state = rank self.pop = pd.DataFrame({ "id": np.array(range(neworder.rank() * n, neworder.rank() * n + n)), "state": np.full(n, neworder.rank()) })
def test(self): # generate some movement neworder.transition(self.s, self.p, self.pop, "state") # send migrants for s in range(neworder.size()): if s != neworder.rank(): emigrants = self.pop[self.pop.state == s] #neworder.log("sending %d emigrants to %d" % (len(emigrants), s)) neworder.send(emigrants, s) # remove the emigrants self.pop = self.pop[self.pop.state == neworder.rank()] # receive migrants for s in range(neworder.size()): if s != neworder.rank(): immigrants = neworder.receive(s) #neworder.log("received %d immigrants from %d" % (len(immigrants), s)) self.pop = self.pop.append(immigrants)
def test(): t = test_.Test() if neworder.size() == 1: neworder.log("Skipping MPI tests") return True # test ustream/sequence t.check(not neworder.indep()) u = neworder.ustream(1000) v = neworder.broadcast(u, 0) # u == v for all processes t.check(np.array_equal(u, v)) return not t.any_failed
def __init__(self, input_files, ht_trans, cache_dir): self.cache_dir = cache_dir # guard for no input data (if more MPI processes than input files) if not len(input_files): raise ValueError("proc {}/{}: no input data".format( no.rank(), no.size())) self.lads = [file.split("_")[1] for file in input_files] # assumes all files in same dir self.data_dir = os.path.dirname(input_files[0]) # store as dict of DFs self.pop = pd.DataFrame() for file in input_files: no.log("reading initial population: %s" % file) data = pd.read_csv(file) data["LAD"] = file.split("_")[1] self.pop = self.pop.append(data) # no.log(self.pop.LC4408_C_AHTHUK11.unique()) # self.cat = self.pop.LC4408_C_AHTHUK11.unique() # "C_AHTHUK11": { # "0": "All categories: Household type", # "1": "One person household", # "2": "Married or same-sex civil partnership couple household", # "3": "Cohabiting couple household", # "4": "Lone parent household", # "5": "Multi-person household" # } self.cat = {"LC4408_C_AHTHUK11": np.array([1, 2, 3, 4, 5])} # NOTE: pandas stores column-major order but numpy view is row major so the matrix looks right but is actually transposed # (no amount of transposing actually changes the memory layout (it just changes the view) # the C++ code assumes the transition matrix is column major (col sums to unity not rows) self.t = pd.read_csv(ht_trans).set_index( "initial state").values / 100.0 # check rows sum to unity assert np.allclose(np.sum(self.t, 1), np.ones(len(self.t))) # TODO get snhp self.snhp = SNHPData.SNHPData(self.cache_dir) self.projection = self.snhp.aggregate(self.lads)
def __init__(self, inputdata, asfr, asmr, asir, asor, ascr, asxr): # guard for no input data (if more MPI processes than input files) if not len(inputdata): raise ValueError("proc {}/{}: no input data".format( neworder.rank(), neworder.size())) self.lads = [file.split("_")[2] for file in inputdata] self.data = pd.DataFrame() for file in inputdata: data = pd.read_csv(file) data["LAD"] = file.split("_")[2] self.data = self.data.append(data) neworder.log("Preprocessing transition data for %s" % ", ".join(self.lads)) self.fertility = ethpop.create_multi(pd.read_csv(asfr), self.lads) self.mortality = ethpop.create_multi(pd.read_csv(asmr), self.lads) self.in_migration = ethpop.local_rates_from_national_rate( ethpop.create_multi(pd.read_csv(asir), self.lads), self.data) self.out_migration = ethpop.create_multi(pd.read_csv(asor), self.lads) self.immigration = ethpop.local_rates_from_absolute( ethpop.create_multi(pd.read_csv(ascr), self.lads), self.data) self.emigration = ethpop.local_rates_from_absolute( ethpop.create_multi(pd.read_csv(asxr), self.lads), self.data) # Force flat rates for testing purposes self.in_migration.Rate = 0.05 self.out_migration.Rate = 0.05 # The actual rates cause exponential growth self.immigration.Rate = 0.01 self.emigration.Rate = 0.005 # use this to identify people (uniquely only within this table) self.counter = len(self.data) # Reformatting of input data is required to match Ethpop categories # actual age is randomised within the bound of the category self.data["Age"] = self.data.DC1117EW_C_AGE - neworder.ustream( len(self.data)) self.data = ethpop.from_census_eth(self.data)
import numpy as np import neworder # must be MPI enabled assert neworder.size() == 1 # category 0 is empty gridsize = [80,100] categories = [0.3, 0.3, 0.3, 0.1] similarity = 0.5 neworder.timeline = (0, 50, 50) # running/debug options neworder.log_level = 1 neworder.do_checks = False # initialisation neworder.initialisations = { "model": { "module": "model", "class_": "Schelling", "parameters": [gridsize, categories, similarity] } } neworder.transitions = { "step": "model.step()", #"redist": "TODO..." } # Finalisation neworder.checkpoints = { "stats": "model.stats()", "anim": "model.animate()"
# run 4 sims #neworder.sequence = np.array([3,1,2,0]) # define the evolution neworder.timeline = (2011, 2012, 1) areas = os.getenv("LADS").split(" ") # define where the starting populations come from data_dir = "examples/households/data" # file_pattern = "hh_%s_OA11_%d.csv" # running/debug options neworder.log_level = 1 # this model isnt meant for parallel execution assert neworder.size( ) == 1, "This example is configured to be run as a single process only" # initialisation neworder.initialisations = { "households": { "module": "households", "class_": "Households", "parameters": [data_dir, file_pattern, areas] } } # timestep must be defined in neworder neworder.transitions = {"age": "households.age(timestep)"} # checks to perform after each timestep. Assumed to return a boolean neworder.do_checks = True # Faith
strike = 100.0 expiry = 0.75 # Using exact MC calc of GBM requires only 1 timestep neworder.timeline = (0, expiry, 1) neworder.nsims = 100000 # number of prices to simulate neworder.sync_streams = True # all procs use same RNG stream neworder.log_level = 1 neworder.do_checks = False # no per-timestep checks implemented since there is only one timestep neworder.checks = {} # use 4 identical sims with perturbations assert neworder.size() == 4 and not neworder.indep( ), "This example requires 4 processes with identical RNG streams" neworder.pv = np.zeros(neworder.size()) # initialisation neworder.initialisations = { "market": { "module": "market", "class_": "Market", "parameters": [spot, rate, divy, vol] }, "option": { "module": "option", "class_": "Option", "parameters": [callput, strike, expiry]
# internal in-migration asir = "examples/shared/NewETHPOP_inmig.csv" # internal out-migration asor = "examples/shared/NewETHPOP_outmig.csv" # immigration ascr = "examples/shared/NewETHPOP_immig.csv" # emigration asxr = "examples/shared/NewETHPOP_emig.csv" # MPI split initial population files over threads def partition(arr, count): return [arr[i::count] for i in range(count)] initial_populations = partition(initial_populations, neworder.size()) # running/debug options neworder.log_level = 1 # initialisation neworder.initialisations = { "people": { "module": "population", "class_": "Population", "parameters": [ initial_populations[neworder.rank()], asfr, asmr, asir, asor, ascr, asxr ]
def write_table(self): # TODO define path in config filename = "./examples/people_multi/data/dm_{:.3f}_{}-{}.csv".format( neworder.time, neworder.rank(), neworder.size()) neworder.log("writing %s" % filename) return self.data.to_csv(filename, index=False)
def write_table(self): file = os.path.join( self.data_dir, "dm_{:.3f}_{}-{}.csv".format(no.time, no.rank(), no.size())) no.log("writing final population: %s" % file) self.pop.to_csv(file, index=False)
""" config.py Microsimulation config for MPI-only implementation of household microsimulation prototype """ import os import glob import numpy as np import neworder assert neworder.size() > 1 and neworder.indep( ), "This example requires MPI with independent RNG streams" # THIS could be very, very useful #https://stackoverflow.com/questions/47297585/building-a-transition-matrix-using-words-in-python-numpy # define the outer sequence loop (optional) # run 4 sims #neworder.sequence = np.array([3,1,2,0]) # define the evolution neworder.timeline = (2011, 2019, 1) # define where the starting populations come from data_dir = "examples/households/data" # TODO this should probably not be same as above cache_dir = "examples/households/data" file_pattern = "hh_*_OA11_2011.csv" # MPI split initial population files over threads def partition(arr, count): return [arr[i::count] for i in range(count)]
import numpy as np import neworder population_size = 100 p_trans = 0.01 # must be MPI enabled assert neworder.size() > 1 neworder.timeline = (0, 100, 100) # running/debug options neworder.log_level = 1 neworder.do_checks = False # initialisation neworder.initialisations = { "test": { "module": "test", "class_": "Test", "parameters": [p_trans, population_size] } } neworder.transitions = { "test": "test.test()", #"redist": "TODO..." } # Finalisation neworder.checkpoints = {
def write_table(self): # TODO define path in config filename = "./examples/world/data/pop2019_{}-{}.csv".format(neworder.rank(), neworder.size()) neworder.log("writing %s" % filename) return self.pop.to_csv(filename, index=False)
def test(): t = test_.Test() if neworder.size() == 1: neworder.log("Skipping MPI tests") return True t.check(send_recv(True)) t.check(send_recv(10)) t.check(send_recv(10.01)) t.check(send_recv("abcdef")) t.check(send_recv([1, 2, 3])) t.check(send_recv({"a": "fghdfkgh"})) x = np.array([1, 4, 9, 16]) if neworder.rank() == 0: neworder.send(x, 1) if neworder.rank() == 1: y = neworder.receive(0) neworder.log("MPI: 0 sent {}={} 1 recd {}={}".format( type(x), x, type(y), y)) t.check(np.array_equal(x, y)) df = pd.read_csv("../../tests/ssm_E09000001_MSOA11_ppp_2011.csv") if neworder.rank() == 0: neworder.log("sending (as csv) df len %d rows from 0" % len(df)) neworder.send_csv(df, 1) if neworder.rank() == 1: dfrec = neworder.receive_csv(0) neworder.log("got (as csv) df len %d rows from 0" % len(dfrec)) t.check(dfrec.equals(df)) if neworder.rank() == 0: neworder.log("sending (pickle) df len %d rows from 0" % len(df)) neworder.send(df, 1) if neworder.rank() == 1: dfrec = neworder.receive(0) neworder.log("got (pickle) df len %d rows from 0" % len(dfrec)) t.check(dfrec.equals(df)) # TODO how to test? neworder.log("process %d syncing..." % neworder.rank()) neworder.sync() neworder.log("process %d synced" % neworder.rank()) i = "rank " + str(neworder.rank()) root = 0 if root == neworder.rank(): neworder.log("broadcasting '%s' from %d" % (i, root)) i = neworder.broadcast(i, root) neworder.log("%d got broadcast: '%s' from %d" % (neworder.rank(), i, root)) t.check(i == "rank 0") # a0 will be different for each proc a0 = np.random.rand(2, 2) if root == neworder.rank(): neworder.log("broadcasting '%s' from %d" % (str(a0), root)) a1 = neworder.broadcast(a0, root) # a1 will equal a0 on rank 0 only neworder.log("%d got broadcast: '%s' from %d" % (neworder.rank(), str(a1), root)) if neworder.rank() == 0: t.check(np.array_equal(a0, a1)) else: t.check(not np.array_equal(a0, a1)) # test ustream/sequence t.check(neworder.indep()) if root == neworder.rank(): u0 = neworder.ustream(1000) u1 = np.zeros(1000) else: u0 = np.zeros(1000) u1 = neworder.ustream(1000) # broadcast u1 from 1 neworder.broadcast(u1, 1) # proc 0 should have 2 different random arrays # proc 1 should have zeros and a random array t.check(not np.array_equal(u0, u1)) # check independent streams u = neworder.ustream(1000) v = neworder.broadcast(u, root) # u == v on broadcasting process only t.check(np.array_equal(u, v) == (neworder.rank() == root)) # test gather x = (neworder.rank() + 1)**2 / 8 a = neworder.gather(x, 0) if neworder.rank() == 0: t.check(np.array_equal(a, [0.125, 0.5])) else: t.check(len(a) == 0) #neworder.log(a) # test scatter if neworder.rank() == 0: a = (np.array(range(neworder.size())) + 1)**2 / 8 else: a = np.zeros(neworder.size()) neworder.log(a) x = neworder.scatter(a, 0) t.check(x == (neworder.rank() + 1)**2 / 8) # test allgather a = np.zeros(neworder.size()) - 1 a[neworder.rank()] = (neworder.rank() + 1)**2 / 8 a = neworder.allgather(a) t.check(np.array_equal(a, np.array([0.125, 0.5]))) # this should probably fail (gather not implemented for int) x = neworder.rank() + 100 a = neworder.gather(x, 0) #neworder.log(type(x)) #neworder.log(type(a)) return not t.any_failed
import os import neworder # MPI split initial population files over threads def partition(arr, count): if count > 1: return [arr[i::count] for i in range(count)] return [arr] allcountries = pd.read_csv("./examples/world/data/CountryLookup.csv", encoding='utf-8', sep="\t")["Code"] initial_populations = partition(allcountries, neworder.size()) #initial_populations = [["ALB", "ASM", "ATG"]] # running/debug options neworder.log_level = 1 # initialisation neworder.initialisations = { "people": { "module": "microsynth", "class_": "Microsynth", "parameters": [initial_populations[neworder.rank()]] } } # define the evolution neworder.timeline = (2019, 2030, 11)