Ejemplo n.º 1
0
    def __init__(self, p, n):
        # states
        self.s = np.array(range(neworder.size()))
        # transition matrix
        self.p = np.identity(neworder.size()) * (1 - neworder.size() * p) + p
        self.n = n

        # all begin with unique id and state = rank
        self.pop = pd.DataFrame({
            "id":
            np.array(range(neworder.rank() * n,
                           neworder.rank() * n + n)),
            "state":
            np.full(n, neworder.rank())
        })
Ejemplo n.º 2
0
    def test(self):
        # generate some movement
        neworder.transition(self.s, self.p, self.pop, "state")

        # send migrants
        for s in range(neworder.size()):
            if s != neworder.rank():
                emigrants = self.pop[self.pop.state == s]
                #neworder.log("sending %d emigrants to %d" % (len(emigrants), s))
                neworder.send(emigrants, s)

        # remove the emigrants
        self.pop = self.pop[self.pop.state == neworder.rank()]

        # receive migrants
        for s in range(neworder.size()):
            if s != neworder.rank():
                immigrants = neworder.receive(s)
                #neworder.log("received %d immigrants from %d" % (len(immigrants), s))
                self.pop = self.pop.append(immigrants)
Ejemplo n.º 3
0
def test():
    t = test_.Test()

    if neworder.size() == 1:
        neworder.log("Skipping MPI tests")
        return True

    # test ustream/sequence
    t.check(not neworder.indep())

    u = neworder.ustream(1000)
    v = neworder.broadcast(u, 0)
    # u == v for all processes

    t.check(np.array_equal(u, v))

    return not t.any_failed
Ejemplo n.º 4
0
    def __init__(self, input_files, ht_trans, cache_dir):

        self.cache_dir = cache_dir
        # guard for no input data (if more MPI processes than input files)
        if not len(input_files):
            raise ValueError("proc {}/{}: no input data".format(
                no.rank(), no.size()))
        self.lads = [file.split("_")[1] for file in input_files]
        # assumes all files in same dir
        self.data_dir = os.path.dirname(input_files[0])

        # store as dict of DFs
        self.pop = pd.DataFrame()

        for file in input_files:
            no.log("reading initial population: %s" % file)
            data = pd.read_csv(file)
            data["LAD"] = file.split("_")[1]
            self.pop = self.pop.append(data)
        # no.log(self.pop.LC4408_C_AHTHUK11.unique())
        # self.cat = self.pop.LC4408_C_AHTHUK11.unique()
        # "C_AHTHUK11": {
        #   "0": "All categories: Household type",
        #   "1": "One person household",
        #   "2": "Married or same-sex civil partnership couple household",
        #   "3": "Cohabiting couple household",
        #   "4": "Lone parent household",
        #   "5": "Multi-person household"
        # }
        self.cat = {"LC4408_C_AHTHUK11": np.array([1, 2, 3, 4, 5])}

        # NOTE: pandas stores column-major order but numpy view is row major so the matrix looks right but is actually transposed
        # (no amount of transposing actually changes the memory layout (it just changes the view)
        # the C++ code assumes the transition matrix is column major (col sums to unity not rows)
        self.t = pd.read_csv(ht_trans).set_index(
            "initial state").values / 100.0
        # check rows sum to unity
        assert np.allclose(np.sum(self.t, 1), np.ones(len(self.t)))

        # TODO get snhp
        self.snhp = SNHPData.SNHPData(self.cache_dir)

        self.projection = self.snhp.aggregate(self.lads)
Ejemplo n.º 5
0
    def __init__(self, inputdata, asfr, asmr, asir, asor, ascr, asxr):

        # guard for no input data (if more MPI processes than input files)
        if not len(inputdata):
            raise ValueError("proc {}/{}: no input data".format(
                neworder.rank(), neworder.size()))

        self.lads = [file.split("_")[2] for file in inputdata]

        self.data = pd.DataFrame()
        for file in inputdata:
            data = pd.read_csv(file)
            data["LAD"] = file.split("_")[2]
            self.data = self.data.append(data)

        neworder.log("Preprocessing transition data for %s" %
                     ", ".join(self.lads))
        self.fertility = ethpop.create_multi(pd.read_csv(asfr), self.lads)
        self.mortality = ethpop.create_multi(pd.read_csv(asmr), self.lads)
        self.in_migration = ethpop.local_rates_from_national_rate(
            ethpop.create_multi(pd.read_csv(asir), self.lads), self.data)
        self.out_migration = ethpop.create_multi(pd.read_csv(asor), self.lads)
        self.immigration = ethpop.local_rates_from_absolute(
            ethpop.create_multi(pd.read_csv(ascr), self.lads), self.data)
        self.emigration = ethpop.local_rates_from_absolute(
            ethpop.create_multi(pd.read_csv(asxr), self.lads), self.data)

        # Force flat rates for testing purposes
        self.in_migration.Rate = 0.05
        self.out_migration.Rate = 0.05
        # The actual rates cause exponential growth
        self.immigration.Rate = 0.01
        self.emigration.Rate = 0.005

        # use this to identify people (uniquely only within this table)
        self.counter = len(self.data)

        # Reformatting of input data is required to match Ethpop categories
        # actual age is randomised within the bound of the category
        self.data["Age"] = self.data.DC1117EW_C_AGE - neworder.ustream(
            len(self.data))
        self.data = ethpop.from_census_eth(self.data)
Ejemplo n.º 6
0
import numpy as np
import neworder

# must be MPI enabled
assert neworder.size() == 1

# category 0 is empty
gridsize = [80,100]
categories = [0.3, 0.3, 0.3, 0.1]
similarity = 0.5

neworder.timeline = (0, 50, 50)

# running/debug options
neworder.log_level = 1
neworder.do_checks = False
 
# initialisation
neworder.initialisations = {
  "model": { "module": "model", "class_": "Schelling", "parameters": [gridsize, categories, similarity] }
}

neworder.transitions = {
  "step": "model.step()",
  #"redist": "TODO..." 
}

# Finalisation 
neworder.checkpoints = {
  "stats": "model.stats()",
  "anim": "model.animate()"
Ejemplo n.º 7
0
# run 4 sims
#neworder.sequence = np.array([3,1,2,0])
# define the evolution
neworder.timeline = (2011, 2012, 1)

areas = os.getenv("LADS").split(" ")

# define where the starting populations come from
data_dir = "examples/households/data"
#
file_pattern = "hh_%s_OA11_%d.csv"

# running/debug options
neworder.log_level = 1
# this model isnt meant for parallel execution
assert neworder.size(
) == 1, "This example is configured to be run as a single process only"

# initialisation
neworder.initialisations = {
    "households": {
        "module": "households",
        "class_": "Households",
        "parameters": [data_dir, file_pattern, areas]
    }
}

# timestep must be defined in neworder
neworder.transitions = {"age": "households.age(timestep)"}

# checks to perform after each timestep. Assumed to return a boolean
neworder.do_checks = True  # Faith
Ejemplo n.º 8
0
strike = 100.0
expiry = 0.75

# Using exact MC calc of GBM requires only 1 timestep
neworder.timeline = (0, expiry, 1)

neworder.nsims = 100000  # number of prices to simulate
neworder.sync_streams = True  # all procs use same RNG stream

neworder.log_level = 1
neworder.do_checks = False
# no per-timestep checks implemented since there is only one timestep
neworder.checks = {}

# use 4 identical sims with perturbations
assert neworder.size() == 4 and not neworder.indep(
), "This example requires 4 processes with identical RNG streams"

neworder.pv = np.zeros(neworder.size())

# initialisation
neworder.initialisations = {
    "market": {
        "module": "market",
        "class_": "Market",
        "parameters": [spot, rate, divy, vol]
    },
    "option": {
        "module": "option",
        "class_": "Option",
        "parameters": [callput, strike, expiry]
Ejemplo n.º 9
0
# internal in-migration
asir = "examples/shared/NewETHPOP_inmig.csv"
# internal out-migration
asor = "examples/shared/NewETHPOP_outmig.csv"
# immigration
ascr = "examples/shared/NewETHPOP_immig.csv"
# emigration
asxr = "examples/shared/NewETHPOP_emig.csv"


# MPI split initial population files over threads
def partition(arr, count):
    return [arr[i::count] for i in range(count)]


initial_populations = partition(initial_populations, neworder.size())

# running/debug options
neworder.log_level = 1

# initialisation
neworder.initialisations = {
    "people": {
        "module":
        "population",
        "class_":
        "Population",
        "parameters": [
            initial_populations[neworder.rank()], asfr, asmr, asir, asor, ascr,
            asxr
        ]
Ejemplo n.º 10
0
 def write_table(self):
     # TODO define path in config
     filename = "./examples/people_multi/data/dm_{:.3f}_{}-{}.csv".format(
         neworder.time, neworder.rank(), neworder.size())
     neworder.log("writing %s" % filename)
     return self.data.to_csv(filename, index=False)
Ejemplo n.º 11
0
 def write_table(self):
     file = os.path.join(
         self.data_dir,
         "dm_{:.3f}_{}-{}.csv".format(no.time, no.rank(), no.size()))
     no.log("writing final population: %s" % file)
     self.pop.to_csv(file, index=False)
Ejemplo n.º 12
0
""" config.py
Microsimulation config for MPI-only implementation of household microsimulation prototype
"""
import os
import glob
import numpy as np
import neworder

assert neworder.size() > 1 and neworder.indep(
), "This example requires MPI with independent RNG streams"

# THIS could be very, very useful
#https://stackoverflow.com/questions/47297585/building-a-transition-matrix-using-words-in-python-numpy

# define the outer sequence loop (optional)
# run 4 sims
#neworder.sequence = np.array([3,1,2,0])
# define the evolution
neworder.timeline = (2011, 2019, 1)

# define where the starting populations come from
data_dir = "examples/households/data"
# TODO this should probably not be same as above
cache_dir = "examples/households/data"
file_pattern = "hh_*_OA11_2011.csv"


# MPI split initial population files over threads
def partition(arr, count):
    return [arr[i::count] for i in range(count)]
Ejemplo n.º 13
0
import numpy as np
import neworder

population_size = 100
p_trans = 0.01

# must be MPI enabled
assert neworder.size() > 1

neworder.timeline = (0, 100, 100)

# running/debug options
neworder.log_level = 1
neworder.do_checks = False

# initialisation
neworder.initialisations = {
    "test": {
        "module": "test",
        "class_": "Test",
        "parameters": [p_trans, population_size]
    }
}

neworder.transitions = {
    "test": "test.test()",
    #"redist": "TODO..."
}

# Finalisation
neworder.checkpoints = {
Ejemplo n.º 14
0
 def write_table(self):
   # TODO define path in config
   filename = "./examples/world/data/pop2019_{}-{}.csv".format(neworder.rank(), neworder.size())
   neworder.log("writing %s" % filename)
   return self.pop.to_csv(filename, index=False)
Ejemplo n.º 15
0
def test():
    t = test_.Test()

    if neworder.size() == 1:
        neworder.log("Skipping MPI tests")
        return True

    t.check(send_recv(True))
    t.check(send_recv(10))
    t.check(send_recv(10.01))
    t.check(send_recv("abcdef"))
    t.check(send_recv([1, 2, 3]))
    t.check(send_recv({"a": "fghdfkgh"}))

    x = np.array([1, 4, 9, 16])
    if neworder.rank() == 0:
        neworder.send(x, 1)
    if neworder.rank() == 1:
        y = neworder.receive(0)
        neworder.log("MPI: 0 sent {}={} 1 recd {}={}".format(
            type(x), x, type(y), y))
        t.check(np.array_equal(x, y))

    df = pd.read_csv("../../tests/ssm_E09000001_MSOA11_ppp_2011.csv")
    if neworder.rank() == 0:
        neworder.log("sending (as csv) df len %d rows from 0" % len(df))
        neworder.send_csv(df, 1)
    if neworder.rank() == 1:
        dfrec = neworder.receive_csv(0)
        neworder.log("got (as csv) df len %d rows from 0" % len(dfrec))
        t.check(dfrec.equals(df))

    if neworder.rank() == 0:
        neworder.log("sending (pickle) df len %d rows from 0" % len(df))
        neworder.send(df, 1)
    if neworder.rank() == 1:
        dfrec = neworder.receive(0)
        neworder.log("got (pickle) df len %d rows from 0" % len(dfrec))
        t.check(dfrec.equals(df))

    # TODO how to test?
    neworder.log("process %d syncing..." % neworder.rank())
    neworder.sync()
    neworder.log("process %d synced" % neworder.rank())

    i = "rank " + str(neworder.rank())
    root = 0
    if root == neworder.rank():
        neworder.log("broadcasting '%s' from %d" % (i, root))
    i = neworder.broadcast(i, root)
    neworder.log("%d got broadcast: '%s' from %d" % (neworder.rank(), i, root))

    t.check(i == "rank 0")

    # a0 will be different for each proc
    a0 = np.random.rand(2, 2)
    if root == neworder.rank():
        neworder.log("broadcasting '%s' from %d" % (str(a0), root))
    a1 = neworder.broadcast(a0, root)
    # a1 will equal a0 on rank 0 only
    neworder.log("%d got broadcast: '%s' from %d" %
                 (neworder.rank(), str(a1), root))
    if neworder.rank() == 0:
        t.check(np.array_equal(a0, a1))
    else:
        t.check(not np.array_equal(a0, a1))

    # test ustream/sequence
    t.check(neworder.indep())
    if root == neworder.rank():
        u0 = neworder.ustream(1000)
        u1 = np.zeros(1000)
    else:
        u0 = np.zeros(1000)
        u1 = neworder.ustream(1000)
    # broadcast u1 from 1
    neworder.broadcast(u1, 1)
    # proc 0 should have 2 different random arrays
    # proc 1 should have zeros and a random array
    t.check(not np.array_equal(u0, u1))

    # check independent streams
    u = neworder.ustream(1000)
    v = neworder.broadcast(u, root)

    # u == v on broadcasting process only
    t.check(np.array_equal(u, v) == (neworder.rank() == root))

    # test gather
    x = (neworder.rank() + 1)**2 / 8
    a = neworder.gather(x, 0)
    if neworder.rank() == 0:
        t.check(np.array_equal(a, [0.125, 0.5]))
    else:
        t.check(len(a) == 0)
    #neworder.log(a)

    # test scatter
    if neworder.rank() == 0:
        a = (np.array(range(neworder.size())) + 1)**2 / 8
    else:
        a = np.zeros(neworder.size())
    neworder.log(a)
    x = neworder.scatter(a, 0)
    t.check(x == (neworder.rank() + 1)**2 / 8)

    # test allgather
    a = np.zeros(neworder.size()) - 1
    a[neworder.rank()] = (neworder.rank() + 1)**2 / 8
    a = neworder.allgather(a)
    t.check(np.array_equal(a, np.array([0.125, 0.5])))

    # this should probably fail (gather not implemented for int)
    x = neworder.rank() + 100
    a = neworder.gather(x, 0)
    #neworder.log(type(x))
    #neworder.log(type(a))

    return not t.any_failed
Ejemplo n.º 16
0
import os
import neworder


# MPI split initial population files over threads
def partition(arr, count):
    if count > 1:
        return [arr[i::count] for i in range(count)]
    return [arr]


allcountries = pd.read_csv("./examples/world/data/CountryLookup.csv",
                           encoding='utf-8',
                           sep="\t")["Code"]

initial_populations = partition(allcountries, neworder.size())
#initial_populations = [["ALB", "ASM", "ATG"]]
# running/debug options
neworder.log_level = 1

# initialisation
neworder.initialisations = {
    "people": {
        "module": "microsynth",
        "class_": "Microsynth",
        "parameters": [initial_populations[neworder.rank()]]
    }
}

# define the evolution
neworder.timeline = (2019, 2030, 11)