def births(self, deltat): # First consider only females females = self.data[self.data.DC1117EW_C_SEX == 2].copy() # Now map the appropriate fertility rate to each female # might be a more efficient way of generating this array rates = females.join( self.fertility, on=["NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"].values # Then randomly determine if a birth occurred (neworder callback) h = neworder.hazard(rates * deltat) # The babies are a clone of the new mothers, with with changed PID, reset age and randomised gender (keeping location and ethnicity) newborns = females[h == 1].copy() newborns.PID = range(self.counter, self.counter + len(newborns)) newborns.Age = neworder.ustream( len(newborns)) # born within the last 12 months newborns.DC1117EW_C_AGE = 1 # this is 0-1 in census category # NOTE: do not convert to pd.Series here to stay as this has its own index which conflicts with the main table newborns.DC1117EW_C_SEX = neworder.hazard(0.5, len(newborns)) + 1 # Finally append newborns to main population and adjust counter self.data = self.data.append(newborns) self.counter = self.counter + len(newborns)
def migrations(self, deltat): # internal immigrations: # - assign the rates to the incumbent popultion appropriately by age,sex,ethnicity # - randomly sample this population, clone and append in_rates = self.data.join(self.in_migration, on=["LAD", "NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"].values # in-migration should be sampling from the whole population ex-LAD, instead do an approximation by scaling up the LAD population # NOTE this is wrong for a number of reasons esp. as it cannot sample category combinations that don't already exist in the LAD h_in = neworder.hazard(in_rates * deltat) incoming = self.data[h_in == 1].copy() # Append incomers to main population and adjust counter # Assign a new id incoming.PID = range(self.counter, self.counter + len(incoming)) incoming.Area = incoming.LAD # assign a new random fractional age based on census age category incoming.Age = incoming.DC1117EW_C_AGE - neworder.ustream(len(incoming)).tolist() self.data = self.data.append(incoming, sort=False) self.counter = self.counter + len(incoming) # internal emigration out_rates = self.data.join(self.out_migration, on=["LAD", "NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"] h_out = neworder.hazard(out_rates.values * deltat) # remove outgoing migrants self.data = self.data[h_out!=1] intl_in_rates = self.data.join(self.immigration, on=["LAD", "NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"] h_intl_in = neworder.hazard(intl_in_rates.values * deltat) intl_incoming = self.data[h_intl_in == 1].copy() intl_incoming.PID = range(self.counter, self.counter + len(intl_incoming)) intl_incoming.Area = "INTL" #self.lad # assign a new random fractional age based on census age category intl_incoming.Age = intl_incoming.DC1117EW_C_AGE - neworder.ustream(len(intl_incoming)).tolist() self.data = self.data.append(intl_incoming) self.counter = self.counter + len(intl_incoming) # international emigrtion intl_out_rates = self.data.join(self.emigration, on=["LAD", "NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"] h_intl_out = neworder.hazard(intl_out_rates.values * deltat) # remove outgoing migrants self.data = self.data[h_intl_out!=1] # record net migration self.in_out = (h_in.sum(), h_out.sum(), h_intl_in.sum(), h_intl_out.sum())
def test(): t = test_.Test() if neworder.size() == 1: neworder.log("Skipping MPI tests") return True # test ustream/sequence t.check(not neworder.indep()) u = neworder.ustream(1000) v = neworder.broadcast(u, 0) # u == v for all processes t.check(np.array_equal(u, v)) return not t.any_failed
def __init__(self, inputdata, asfr, asmr, asir, asor, ascr, asxr): # guard for no input data (if more MPI processes than input files) if not len(inputdata): raise ValueError("proc {}/{}: no input data".format( neworder.rank(), neworder.size())) self.lads = [file.split("_")[2] for file in inputdata] self.data = pd.DataFrame() for file in inputdata: data = pd.read_csv(file) data["LAD"] = file.split("_")[2] self.data = self.data.append(data) neworder.log("Preprocessing transition data for %s" % ", ".join(self.lads)) self.fertility = ethpop.create_multi(pd.read_csv(asfr), self.lads) self.mortality = ethpop.create_multi(pd.read_csv(asmr), self.lads) self.in_migration = ethpop.local_rates_from_national_rate( ethpop.create_multi(pd.read_csv(asir), self.lads), self.data) self.out_migration = ethpop.create_multi(pd.read_csv(asor), self.lads) self.immigration = ethpop.local_rates_from_absolute( ethpop.create_multi(pd.read_csv(ascr), self.lads), self.data) self.emigration = ethpop.local_rates_from_absolute( ethpop.create_multi(pd.read_csv(asxr), self.lads), self.data) # Force flat rates for testing purposes self.in_migration.Rate = 0.05 self.out_migration.Rate = 0.05 # The actual rates cause exponential growth self.immigration.Rate = 0.01 self.emigration.Rate = 0.005 # use this to identify people (uniquely only within this table) self.counter = len(self.data) # Reformatting of input data is required to match Ethpop categories # actual age is randomised within the bound of the category self.data["Age"] = self.data.DC1117EW_C_AGE - neworder.ustream( len(self.data)) self.data = ethpop.from_census_eth(self.data)
def __init__(self, inputdata, asfr, asmr, asir, asor, ascr, asxr): self.lad = inputdata.split("_")[1] self.data = pd.read_csv(inputdata) self.fertility = ethpop.create(pd.read_csv(asfr), self.lad) self.mortality = ethpop.create(pd.read_csv(asmr), self.lad) # assume the in-migration rates are based on the national population and need to be rescaled... base_pop = len(self.data) # deal with census-merged LADs if self.lad == "E09000001" or self.lad == "E09000033": base_pop = 219340 + 7397 elif self.lad == "E06000052" or self.lad == "E06000053": raise NotImplementedError("Cornwall CM LAD adj") self.in_migration = ethpop.local_rate_from_national_rate( ethpop.create(pd.read_csv(asir), self.lad), base_pop) # assume the out-migration rates don't require adjustment self.out_migration = ethpop.create(pd.read_csv(asor), self.lad) self.immigration = ethpop.local_rate_rescale_from_absolute( ethpop.create(pd.read_csv(ascr), self.lad), base_pop) self.emigration = ethpop.local_rate_rescale_from_absolute( ethpop.create(pd.read_csv(asxr), self.lad), base_pop) # Force flat rates for testing purposes #self.in_migration.Rate = 0.05 #self.out_migration.Rate = 0.05 self.immigration.Rate = 0.01 self.emigration.Rate = 0.005 # use this to identify people (uniquely only within this table) self.counter = len(self.data) # Reformatting of input data is required to match Ethpop categories # actual age is randomised within the bound of the category self.data["Age"] = self.data.DC1117EW_C_AGE - neworder.ustream( len(self.data)) self.data = ethpop.from_census_eth(self.data)
def test(): t = test_.Test() x = -1e10 t.check(no.distant_past() < x) t.check(no.far_future() > x) x = 1e10 t.check(no.distant_past() < x) t.check(no.far_future() > x) # dreams never end t.check(no.never() != no.never()) t.check(not no.never() == x) t.check(no.never() != x) t.check(not x < no.never()) t.check(not x >= no.never()) # no nay never: t.check(not no.isnever(x)) # no nay never no more: t.check(no.isnever(no.never())) #t.check(False) s = no.ustream(10000) t.check(isinstance(s, np.ndarray)) t.check(len(s) == 10000) t.check(abs(np.mean(s) - 0.5) < 0.02) f = no.lazy_eval("2 + 2") t.check(f() == 4) # # TODO this overlaps/duplicates tests in op.py - reorganise # # test thinning algorithm for non-homogeneous Poisson process # h = np.array([0.014] * 10) # #l = no.stopping(h) # l = no.first_arrival(h, 1.0, 10000) # t.check(abs(np.mean(l) * 0.014 - 1.0) < 0.03) # # varying timestep should make no difference # l = no.first_arrival(h, 0.1, 10000) # t.check(abs(np.mean(l) * 0.014 - 1.0) < 0.03) # # test a certain(ish) hazard rate # h = np.array([0.99, 0.99, 0.01]) # l = no.first_arrival(h, 1.0, 10000) # no.log("TODO NHPP appears broken: %f" % np.mean(l)) # # test a zero(ish) hazard rate # h = np.array([1e-30, 1e-30, 1e-30, .9999]) # l = no.first_arrival(h, 1.0, 10000) # no.log("TODO NHPP appears broken: %f" % np.mean(l)) # # this also tests a zero hazard rate # h = np.array([i/3000 for i in range(100)]) # #no.log(h) # le = no.first_arrival(h, 1.0, 10000) # no.log(sum(le)/len(le)) # # y # h = np.array([0.999, 0.1]) # le = no.first_arrival(h, 1.0, 1000) # no.log(sum(le)/len(le)) sometime = no.isnever(np.full(10, 1.0)) t.check(np.all(~sometime)) never = no.isnever(np.full(10, no.never())) no.log(never) t.check(np.all(never)) # # DataFrame ops # modify df passing column df = pd.read_csv("../../tests/df.csv") # modify df passing directly no.directmod(df, "DC2101EW_C_ETHPUK11") t.check( np.array_equal(df["DC2101EW_C_ETHPUK11"].values, np.zeros(len(df)) + 3)) df = pd.read_csv("../../tests/df.csv") cats = np.array(range(4)) transitions = np.identity(len(cats)) * 0 + 0.25 #no.log(transitions) no.transition(cats, transitions, df, "DC2101EW_C_ETHPUK11") # it's possible this could fail depending on random draw t.check( np.array_equal(np.sort(df["DC2101EW_C_ETHPUK11"].unique()), np.array(range(4)))) # df2 = df.copy() # df3 = no.append(df,df2) # t.check(len(df3) == len(df) + len(df2)) return not t.any_failed
def nstream(n): """ Return a vector of n normally distributed pseudorandom variates (mean zero unity variance) """ return scipy.stats.norm.ppf(neworder.ustream(n))
def test(): t = test_.Test() if neworder.size() == 1: neworder.log("Skipping MPI tests") return True t.check(send_recv(True)) t.check(send_recv(10)) t.check(send_recv(10.01)) t.check(send_recv("abcdef")) t.check(send_recv([1, 2, 3])) t.check(send_recv({"a": "fghdfkgh"})) x = np.array([1, 4, 9, 16]) if neworder.rank() == 0: neworder.send(x, 1) if neworder.rank() == 1: y = neworder.receive(0) neworder.log("MPI: 0 sent {}={} 1 recd {}={}".format( type(x), x, type(y), y)) t.check(np.array_equal(x, y)) df = pd.read_csv("../../tests/ssm_E09000001_MSOA11_ppp_2011.csv") if neworder.rank() == 0: neworder.log("sending (as csv) df len %d rows from 0" % len(df)) neworder.send_csv(df, 1) if neworder.rank() == 1: dfrec = neworder.receive_csv(0) neworder.log("got (as csv) df len %d rows from 0" % len(dfrec)) t.check(dfrec.equals(df)) if neworder.rank() == 0: neworder.log("sending (pickle) df len %d rows from 0" % len(df)) neworder.send(df, 1) if neworder.rank() == 1: dfrec = neworder.receive(0) neworder.log("got (pickle) df len %d rows from 0" % len(dfrec)) t.check(dfrec.equals(df)) # TODO how to test? neworder.log("process %d syncing..." % neworder.rank()) neworder.sync() neworder.log("process %d synced" % neworder.rank()) i = "rank " + str(neworder.rank()) root = 0 if root == neworder.rank(): neworder.log("broadcasting '%s' from %d" % (i, root)) i = neworder.broadcast(i, root) neworder.log("%d got broadcast: '%s' from %d" % (neworder.rank(), i, root)) t.check(i == "rank 0") # a0 will be different for each proc a0 = np.random.rand(2, 2) if root == neworder.rank(): neworder.log("broadcasting '%s' from %d" % (str(a0), root)) a1 = neworder.broadcast(a0, root) # a1 will equal a0 on rank 0 only neworder.log("%d got broadcast: '%s' from %d" % (neworder.rank(), str(a1), root)) if neworder.rank() == 0: t.check(np.array_equal(a0, a1)) else: t.check(not np.array_equal(a0, a1)) # test ustream/sequence t.check(neworder.indep()) if root == neworder.rank(): u0 = neworder.ustream(1000) u1 = np.zeros(1000) else: u0 = np.zeros(1000) u1 = neworder.ustream(1000) # broadcast u1 from 1 neworder.broadcast(u1, 1) # proc 0 should have 2 different random arrays # proc 1 should have zeros and a random array t.check(not np.array_equal(u0, u1)) # check independent streams u = neworder.ustream(1000) v = neworder.broadcast(u, root) # u == v on broadcasting process only t.check(np.array_equal(u, v) == (neworder.rank() == root)) # test gather x = (neworder.rank() + 1)**2 / 8 a = neworder.gather(x, 0) if neworder.rank() == 0: t.check(np.array_equal(a, [0.125, 0.5])) else: t.check(len(a) == 0) #neworder.log(a) # test scatter if neworder.rank() == 0: a = (np.array(range(neworder.size())) + 1)**2 / 8 else: a = np.zeros(neworder.size()) neworder.log(a) x = neworder.scatter(a, 0) t.check(x == (neworder.rank() + 1)**2 / 8) # test allgather a = np.zeros(neworder.size()) - 1 a[neworder.rank()] = (neworder.rank() + 1)**2 / 8 a = neworder.allgather(a) t.check(np.array_equal(a, np.array([0.125, 0.5]))) # this should probably fail (gather not implemented for int) x = neworder.rank() + 100 a = neworder.gather(x, 0) #neworder.log(type(x)) #neworder.log(type(a)) return not t.any_failed