def test() -> None: df = pd.read_csv("./test/df.csv") # base model for MC engine model = no.Model(no.NoTimeline(), no.MonteCarlo.deterministic_identical_stream) cats = np.array(range(4)) # identity matrix means no transitions trans = np.identity(len(cats)) no.df.transition(model, cats, trans, df, "DC2101EW_C_ETHPUK11") assert len(df["DC2101EW_C_ETHPUK11"].unique() ) == 1 and df["DC2101EW_C_ETHPUK11"].unique()[0] == 2 # NOTE transition matrix interpreted as being COLUMN MAJOR due to pandas DataFrame storing data in column-major order # force 2->3 trans[2, 2] = 0.0 trans[2, 3] = 1.0 no.df.transition(model, cats, trans, df, "DC2101EW_C_ETHPUK11") no.log(df["DC2101EW_C_ETHPUK11"].unique()) assert len(df["DC2101EW_C_ETHPUK11"].unique() ) == 1 and df["DC2101EW_C_ETHPUK11"].unique()[0] == 3 # ~half of 3->0 trans[3, 0] = 0.5 trans[3, 3] = 0.5 no.df.transition(model, cats, trans, df, "DC2101EW_C_ETHPUK11") assert np.array_equal(np.sort(df["DC2101EW_C_ETHPUK11"].unique()), np.array([0, 3]))
def finalise(self) -> None: # process 0 assembles all the data and prints a summary pops = comm.gather(self.pop, root=0) if pops: pop = pd.concat(pops) neworder.log("State counts (total %d):\n%s" % (len(pop), pop["state"].value_counts().to_string()))
def check(self) -> bool: """ State of the nation """ # check no duplicated unique indices if len(self.population[self.population.index.duplicated(keep=False)]): neworder.log("Duplicate indices found") return False # Valid ETH, SEX, AGE if not np.array_equal(sorted(self.population.DC1117EW_C_SEX.unique()), [1,2]): neworder.log("invalid gender value") return False if min(self.population.DC1117EW_C_AGE.unique().astype(int)) < 1 or \ max(self.population.DC1117EW_C_AGE.unique().astype(int)) > 86: neworder.log("invalid categorical age value") return False # this can go below zero for cat 86+ if (self.population.DC1117EW_C_AGE - self.population.Age).max() >= 1.0: neworder.log("invalid fractional age value") return False neworder.log("check OK: time={} size={} mean_age={:.2f}, pct_female={:.2f} net_migration={} ({}-{})" \ .format(self.timeline.time().date(), self.size(), self.mean_age(), 100.0 * self.gender_split(), self.in_out[0] - self.in_out[1], self.in_out[0], self.in_out[1])) # if all is ok, plot the data self.plot_pyramid() return True # Faith
def write_table(self): year = no.timeline[-2] for area in self.areas: file = os.path.join(self.cache_dir, "dm_" + self.file_pattern % (area, year)) no.log("writing final population: %s" % file) self.pop[area].to_csv(file, index=False)
def get_a_life(self, households): #x = self.pp.loc[(self.pp.hh_id == self.pp.loc[self.pp.id == self.pp.mother_id].hh_id)] # & (self.pp.age >= 24)] #neworder.log(x) # links id to mother_id, filtering for age and same household movers = self.pp.merge(self.pp[['id', 'hh_id' ]].rename(columns={'id': 'mother_id'}), how='left', on="mother_id") mover_ids = movers[(movers.hh_id_x == movers.hh_id_y) & (movers.age >= 24)].id.values if len(mover_ids): new_hh_ids = households.new(len(mover_ids)) # children (0 or more) move with mother so think loop is unavoidable here nchildren = 0 for i in range(len(mover_ids)): old_hh_id = self.pp.loc[self.pp.id == mover_ids[i], "hh_id"].values[0] # move person self.pp.loc[self.pp.id == mover_ids[i], "hh_id"] = new_hh_ids[i] # move any children in same house nchildren = nchildren + len( self.pp.loc[(self.pp.mother_id == mover_ids[i]) & (self.pp.hh_id == old_hh_id)]) self.pp.loc[(self.pp.mother_id == mover_ids[i]) & (self.pp.hh_id == old_hh_id), "hh_id"] = new_hh_ids[i] neworder.log("movers %d + %d children" % (len(mover_ids), nchildren))
def write_table(self): file = os.path.join( self.data_dir, "dm_{:.3f}_{}-{}.csv".format(no.time, no.mpi.rank(), no.mpi.size())) no.log("writing final population: %s" % file) self.pop.to_csv(file, index=False)
def check(self, expr): if not expr: trace = inspect.stack()[1] neworder.log("FAIL %s at %s:%d" % (trace.code_context[0].strip("\n"), trace.filename, trace.lineno)) #["code_context"]) self.any_failed = True
def __init__(self, n): # initialise population - time of death only self.population = pd.DataFrame(data={"TimeOfDeath": neworder.first_arrival(data.mortality_rate, neworder.timestep, n, 0.0), "TimeOfPregnancy": np.full(n, neworder.never()), "Parity": np.full(n, Parity.CHILDLESS), "Unions": np.zeros(n, dtype=int), }) # Construct a timeline of unions for each person # first union - probabilities start at 15, so we add this on afterwards self.population["T_Union1Start"] = neworder.first_arrival(data.p_u1f, data.delta_t, len(self.population)) + data.min_age self.population["T_Union1End"] = neworder.next_arrival(self.population["T_Union1Start"].values, data.r_diss2[0], data.delta_t_u, True, data.min_u1) # second union self.population["T_Union2Start"] = neworder.next_arrival(self.population["T_Union1End"].values, data.r_u2f, data.delta_t, True) # no mimimum time of 2nd union self.population["T_Union2End"] = neworder.next_arrival(self.population["T_Union2Start"].values, data.r_diss2[1], data.delta_t_u, True) # and discard events happening after death self.population.loc[self.population["T_Union1Start"] > self.population["TimeOfDeath"], "T_Union1Start"] = neworder.never() self.population.loc[self.population["T_Union1End"] > self.population["TimeOfDeath"], "T_Union1End"] = neworder.never() self.population.loc[self.population["T_Union2Start"] > self.population["TimeOfDeath"], "T_Union2Start"] = neworder.never() self.population.loc[self.population["T_Union2End"] > self.population["TimeOfDeath"], "T_Union2End"] = neworder.never() # count unions entered into self.population.Unions = (~neworder.isnever(self.population["T_Union1Start"].values)).astype(int) \ + (~neworder.isnever(self.population["T_Union2Start"].values)).astype(int) neworder.log("RiskPaths initialised")
def compare(self, pv_mc, nsims, option, market): """ Compare MC price to analytic """ ref = self.analytic(option, market) err = pv_mc / ref - 1.0 neworder.log("mc: {:.6f} / ref: {:.6f} err={:.2%}".format( pv_mc, ref, err)) # relative error should be within O(1/(sqrt(sims))) of analytic solution return True if abs(err) <= 2.0 / sqrt(nsims) else False
def check(self): """ State of the nation """ check(self.data) neworder.log("check OK: time={:.3f} size={} mean_age={:.2f}, pct_female={:.2f} net_migration={} ({}-{}+{}-{})" \ .format(neworder.time, self.size(), self.mean_age(), 100.0 * self.gender_split(), self.in_out[0] - self.in_out[1] + self.in_out[2] - self.in_out[3], self.in_out[0], self.in_out[1], self.in_out[2], self.in_out[3])) return True # Faith
def compare(self) -> bool: """ Compare MC price to analytic """ ref = self.analytic() err = self.pv / ref - 1.0 neworder.log("mc: {:.6f} / ref: {:.6f} err={:.2%}".format( self.pv, ref, err)) # relative error should be within O(1/(sqrt(sims))) of analytic solution return True if abs(err) <= 2.0 / np.sqrt(self.nsims) else False
def calc_life_expectancy(self): # compute mean sampled life expectancy against theoretical sample_le = sum([p.time_mortality for p in self.population]) / len(self.population) actual_le = 1.0 / self.population[0].mortality_hazard error = sample_le - actual_le neworder.log("Life expectancy = %.2f years (sampling error=%f)" % (sample_le, error))
def send_recv(x: Any) -> bool: if no.mpi.rank() == 0: comm.send(x, dest=1) if no.mpi.rank() == 1: y = comm.recv(source=0) no.log("MPI: 0 sent {}={} 1 recd {}={}".format(type(x), x, type(y), y)) if y != x: return False return True
def finalise(self) -> None: """ This method (optional, if defined) is run at the end of the timeline Arguments: self Returns: NoneType """ for i, r in self.population.iterrows(): if r.talkative: neworder.log("Hello from %d" % i)
def __init__(self, mortality_hazard: float, n: int) -> None: # initialise base model with a nondeterministic seed results will vary (slightly) super().__init__(neworder.NoTimeline(), neworder.MonteCarlo.nondeterministic_stream) # initialise population self.population = [Person(mortality_hazard) for _ in range(n)] neworder.log("created %d individuals" % n)
def step(self): crimes = self.__sample_crimes().sort_values(by="time") no.log("Sampled %d crimes in month beginning %s" % (len(crimes), self.timeline().time())) if self.__burn_in > self.timeline().index(): # append crimes during burn-in period self.crimes = self.crimes.append(crimes) else: # replace crimes after burn-in period self.crimes = crimes
def init_model(force_area, year, month): global model global time # monthly open-ended timeline model = CrimeMicrosim(force_area, (year, month), agg_mode=False) time = model.timeline().time() no.log("Initialised crime model in %s at %s" % (force_area, model.timeline().time())) # simulate the first month get_crimes(1.0)
def on_keypress(event): if event.key == "p": self.halt() # if event.key == "r": # no.run(self) elif event.key == "q": self.halt() else: no.log("%s doesnt do anything. p to pause/resume, q to quit" % event.key)
def send_recv(x): if neworder.rank() == 0: neworder.send(x, 1) if neworder.rank() == 1: y = neworder.receive(0) neworder.log("MPI: 0 sent {}={} 1 recd {}={}".format( type(x), x, type(y), y)) if y != x: return False return True
def check(self) -> bool: # check momentum and energy conservation px = np.sum(self.bodies.m * self.bodies.vx) py = np.sum(self.bodies.m * self.bodies.vy) pz = np.sum(self.bodies.m * self.bodies.vz) ke = np.sum(self.bodies.ke) pe = np.sum(self.bodies.pe) no.log("p=%g,%g,%g" % (px, py, pz)) no.log("delta E=%f" % (ke + pe - self.E0)) return np.fabs(ke + pe - self.E0) < 20.2
def get_crimes(loading): global model no.log("Setting loading factor to %f" % loading) no.log("Sampling crimes in %s for month beginning %s" % (model.force_area(), model.timeline().time())) model.set_loading(loading) no.run(model) buf = StringIO() model.crimes.to_csv(buf) return buf.getvalue()
def __init__(self, countries): country_lookup = pd.read_csv("./examples/world/data/CountryLookup.csv", sep="\t").set_index("Code")["Country"].to_dict() self.value_column = "2019 [YR2019]" self.countries = countries self.pop = pd.DataFrame() alldata = pd.read_csv("./examples/world/data/CountryData.csv").replace("..","") alldata[self.value_column] = pd.to_numeric(alldata[self.value_column]) for country in self.countries: neworder.log("Microsynthesising population for %s" % country_lookup[country]) data = alldata[(alldata["Country Code"] == country) & (alldata["Series Code"]).str.match("SP.POP.*(FE|MA)$")] # fallback to gender totals if age-specific values not available if data[self.value_column].isnull().values.any(): neworder.log("%s: age-gender specific population data not available" % country_lookup[country]) data = alldata[(alldata["Country Code"] == country) & (alldata["Series Code"]).str.match("^SP.POP.TOTL.(FE|MA).IN$")] # fallback to overall total if gender-specific values not available if data[self.value_column].isnull().values.any(): neworder.log("%s: gender specific population data not available" % country_lookup[country]) data = alldata[(alldata["Country Code"] == country) & (alldata["Series Code"]).str.match("^SP.POP.TOTL$")] assert len(data) == 1 if np.isnan(data[self.value_column].values): neworder.log("%s: total population data not available - skipping" % country) else: self._generate_from_total(data[self.value_column].values, country) else: raise NotImplementedError("microsynth from M/F totals") else: data = pd.concat([data, data["Series Code"].str.split(".", expand=True)], axis=1) \ .drop(["Country Code", "Series Code", 0, 1], axis=1) \ .set_index([2,3]).unstack() # get synth pop for the country self._generate(data.values, country)
def finalise(self): deaths = sum(~neworder.time.isnever(self.pop.tDeceased.values)) # simple measure of test coverage 100% or severe and above, 25% of mild observed_cases = sum(~neworder.time.isnever(self.pop.tSevere.values)) + 0.25 * sum(~neworder.time.isnever(self.pop.tMild.values)) neworder.log("Mortality: observed = %.2f%%, actual = %.f%%" % (100.0 * deaths / observed_cases, 100.0 * deaths / self.npeople)) self.summary = self.summary.fillna(0) self.summary.index = range(1,len(self.summary)+1) # use the string representations of thobserved_casese int enums self.summary.rename(columns={s: State(s).name for s in self.summary.columns.values}, inplace=True) Graphics().plot(self)
def calc_life_expectancy(self): # ensure all people have died assert np.sum(self.population.Alive) == 0 #self.dump("./population.csv") # in this case we can just compute the mortality directly by modelling a non-homogeneous Poisson process and # using the Lewis-Shedler algorithm self.population["TimeOfDeathNHPP"] = neworder.first_arrival( self.mortality_hazard.Rate.values, neworder.timestep, len(self.population)) neworder.log("%f vs %f" % (np.mean(self.population.TimeOfDeath), np.mean(self.population.TimeOfDeathNHPP))) return np.mean(self.population.TimeOfDeath)
def greeks(self, pv): neworder.sync() pvs = neworder.gather(pv, 0) if neworder.rank() == 0: neworder.log("PV=%f" % pvs[0]) neworder.log("delta=%f" % ((pvs[1] - pvs[2]) / 2)) neworder.log("gamma=%f" % ((pvs[1] - 2 * pvs[0] + pvs[2]))) neworder.log("vega 10bp=%f" % (pvs[3] - pvs[0]))
def pop_crimes(): global model, time, timestep # TODO this is inefficient if time >= model.crimes.time.max(): no.log("Sampling crimes in %s for month beginning %s" % (model.force_area(), model.timeline().time())) no.run(model) end = time + timestep buf = StringIO() model.crimes[(model.crimes.time >= time) & (model.crimes.time < end)].to_csv(buf) time = end return buf.getvalue()
def divorce(self): drate = pd.DataFrame({ "agegrp": self.drate.agegrp, "divorce_rate": self.drate[str(int(neworder.timeline.time()))].values }) neworder.log(drate) # TODO need to map agegrp to age to merge drates = pd.merge(self.pp, drate, how='left').fillna(0.0)["fertility_rate"].values self.pp["__divorce"] = neworder.hazard(drates * neworder.timestep) neworder.log(self.pp)
def greeks(self) -> None: # get all the results pvs = comm.gather(self.pv, 0) # compute sensitivities on rank 0 if pvs: neworder.log(f"PV={pvs[0]:.3f}") neworder.log(f"delta={(pvs[1] - pvs[2]) / 2:.3f}") neworder.log(f"gamma={(pvs[1] - 2 * pvs[0] + pvs[2]):.3f}") neworder.log(f"vega 10bp={pvs[3] - pvs[0]:.3f}")
def age(self, dt): col = "LC4408_C_AHTHUK11" for area in self.areas: actual = len(self.pop[area]) projected = int(self.projection.loc[ self.projection["PROJECTED_YEAR_NAME"] == int(no.time), "OBS_VALUE"].values[0]) #no.log(self.cat[col]) no.df.transition(self.cat[col], self.t, self.pop[area], "LC4408_C_AHTHUK11") if actual < projected: no.log("sampling deficit %d households (vs projection)" % (projected - actual)) deficit = int(projected) - actual newbuilds = self.pop[area].sample(deficit) self.pop = self.pop[area].append(newbuilds, ignore_index=True)
def get_crimes(start, end): global model, timestep ts = datetime.strptime(start, TIME_FORMAT) te = datetime.strptime(end, TIME_FORMAT) # NB model time is the start of the *next* (as yet unsampled) timestep if ts >= model.timeline().time(): no.log("%s Sampling crimes in %s for month beginning %s..." % (datetime.now(), model.force_area(), model.timeline().time())) no.run(model) no.log("%s sampling complete" % datetime.now()) #no.log("%s -> %s: %d" % (ts, te, len(model.crimes[(model.crimes.time >= ts) & (model.crimes.time < te)]))) buf = StringIO() model.crimes[(model.crimes.time >= ts) & (model.crimes.time < te)].to_csv(buf) return buf.getvalue()