def age(self, dt): col = "LC4408_C_AHTHUK11" for area in self.areas: actual = len(self.pop[area]) projected = int(self.projection.loc[ self.projection["PROJECTED_YEAR_NAME"] == int(no.time), "OBS_VALUE"].values[0]) #no.log(self.cat[col]) no.transition(self.cat[col], self.t, self.pop[area], "LC4408_C_AHTHUK11") if actual < projected: no.log("sampling deficit %d households (vs projection)" % (projected - actual)) deficit = int(projected) - actual newbuilds = self.pop[area].sample(deficit) self.pop = self.pop[area].append(newbuilds, ignore_index=True)
def test(self): # generate some movement neworder.transition(self.s, self.p, self.pop, "state") # send migrants for s in range(neworder.size()): if s != neworder.rank(): emigrants = self.pop[self.pop.state == s] #neworder.log("sending %d emigrants to %d" % (len(emigrants), s)) neworder.send(emigrants, s) # remove the emigrants self.pop = self.pop[self.pop.state == neworder.rank()] # receive migrants for s in range(neworder.size()): if s != neworder.rank(): immigrants = neworder.receive(s) #neworder.log("received %d immigrants from %d" % (len(immigrants), s)) self.pop = self.pop.append(immigrants)
def age(self, dt): col = "LC4408_C_AHTHUK11" no.transition(self.cat[col], self.t, self.pop, "LC4408_C_AHTHUK11") # ensure area totals match projections for lad in self.pop["LAD"].unique(): lad_pop = self.pop[self.pop["LAD"] == lad] actual = len(lad_pop) # TODO LAD projected = self.projection.loc[ (self.projection["PROJECTED_YEAR_NAME"] == int(no.time)) & (self.projection["GEOGRAPHY_CODE"] == lad), "OBS_VALUE"] if len(projected) == 0: no.log( "WARNING %s cannot find household projection data for %d", (lad, no.time)) projected = int(projected.values[0]) if actual < projected: no.log("sampling deficit %d households (vs projection)" % (projected - actual)) deficit = int(projected) - actual self.pop = self.pop.append(lad_pop.sample(deficit), ignore_index=True)
def test(): t = test_.Test() df = pd.read_csv("../../tests/df.csv") cats = np.array(range(4)) # identity matrix means no transitions trans = np.identity(len(cats)) no.transition(cats, trans, df, "DC2101EW_C_ETHPUK11") t.check( len(df["DC2101EW_C_ETHPUK11"].unique()) == 1 and df["DC2101EW_C_ETHPUK11"].unique()[0] == 2) # NOTE transition matrix interpreted as being COLUMN MAJOR due to pandas DataFrame storing data in column-major order # force 2->3 trans[2, 2] = 0.0 trans[3, 2] = 1.0 no.transition(cats, trans, df, "DC2101EW_C_ETHPUK11") t.check( len(df["DC2101EW_C_ETHPUK11"].unique()) == 1 and df["DC2101EW_C_ETHPUK11"].unique()[0] == 3) # ~half of 3->0 trans[0, 3] = 0.5 trans[3, 3] = 0.5 no.transition(cats, trans, df, "DC2101EW_C_ETHPUK11") t.check( np.array_equal(np.sort(df["DC2101EW_C_ETHPUK11"].unique()), np.array([0, 3]))) return not t.any_failed # def todo(): # # define some global variables describing where the starting population and the parameters of the dynamics come from # initial_population = "examples/households/data/ssm_hh_E09000001_OA11_2011.csv" # hh = pd.read_csv(initial_population) # print(hh.columns.values) # c = hh.LC4408_C_AHTHUK11.unique() # print(c) # t = np.identity(len(c)) # # [ 3 5 1 2 -1 4] # t = np.array([[0.9, 0.05, 0.05, 0.0, 0.0, 0.0], # [0.05, 0.9, 0.04, 0.01, 0.0, 0.0], # [0.0, 0.05, 0.9, 0.05, 0.0, 0.0], # [0.0, 0.0, 0.05, 0.9, 0.05, 0.0], # [0.1, 0.1, 0.1, 0.1, 0.5, 0.1], # [0.0, 0.0, 0.00, 0.0, 0.2, 0.8]]) # #print(t[1]) # horz # #print(t[:,1]) # vert # tc = np.cumsum(t, axis=1) # # TODO timing... # u = np.random.sample(len(hh)) # for i in range(len(hh)): # current = hh.loc[i, "LC4408_C_AHTHUK11"] # hh.loc[i, "LC4408_C_AHTHUK11"] = sample(u[i], tc[current], c) # print(hh.LC4408_C_AHTHUK11.head()) # tc = np.cumsum(t, axis=1) # print(np.cumsum(t[1])) # #print()
def test(): t = test_.Test() x = -1e10 t.check(no.distant_past() < x) t.check(no.far_future() > x) x = 1e10 t.check(no.distant_past() < x) t.check(no.far_future() > x) # dreams never end t.check(no.never() != no.never()) t.check(not no.never() == x) t.check(no.never() != x) t.check(not x < no.never()) t.check(not x >= no.never()) # no nay never: t.check(not no.isnever(x)) # no nay never no more: t.check(no.isnever(no.never())) #t.check(False) s = no.ustream(10000) t.check(isinstance(s, np.ndarray)) t.check(len(s) == 10000) t.check(abs(np.mean(s) - 0.5) < 0.02) f = no.lazy_eval("2 + 2") t.check(f() == 4) # # TODO this overlaps/duplicates tests in op.py - reorganise # # test thinning algorithm for non-homogeneous Poisson process # h = np.array([0.014] * 10) # #l = no.stopping(h) # l = no.first_arrival(h, 1.0, 10000) # t.check(abs(np.mean(l) * 0.014 - 1.0) < 0.03) # # varying timestep should make no difference # l = no.first_arrival(h, 0.1, 10000) # t.check(abs(np.mean(l) * 0.014 - 1.0) < 0.03) # # test a certain(ish) hazard rate # h = np.array([0.99, 0.99, 0.01]) # l = no.first_arrival(h, 1.0, 10000) # no.log("TODO NHPP appears broken: %f" % np.mean(l)) # # test a zero(ish) hazard rate # h = np.array([1e-30, 1e-30, 1e-30, .9999]) # l = no.first_arrival(h, 1.0, 10000) # no.log("TODO NHPP appears broken: %f" % np.mean(l)) # # this also tests a zero hazard rate # h = np.array([i/3000 for i in range(100)]) # #no.log(h) # le = no.first_arrival(h, 1.0, 10000) # no.log(sum(le)/len(le)) # # y # h = np.array([0.999, 0.1]) # le = no.first_arrival(h, 1.0, 1000) # no.log(sum(le)/len(le)) sometime = no.isnever(np.full(10, 1.0)) t.check(np.all(~sometime)) never = no.isnever(np.full(10, no.never())) no.log(never) t.check(np.all(never)) # # DataFrame ops # modify df passing column df = pd.read_csv("../../tests/df.csv") # modify df passing directly no.directmod(df, "DC2101EW_C_ETHPUK11") t.check( np.array_equal(df["DC2101EW_C_ETHPUK11"].values, np.zeros(len(df)) + 3)) df = pd.read_csv("../../tests/df.csv") cats = np.array(range(4)) transitions = np.identity(len(cats)) * 0 + 0.25 #no.log(transitions) no.transition(cats, transitions, df, "DC2101EW_C_ETHPUK11") # it's possible this could fail depending on random draw t.check( np.array_equal(np.sort(df["DC2101EW_C_ETHPUK11"].unique()), np.array(range(4)))) # df2 = df.copy() # df3 = no.append(df,df2) # t.check(len(df3) == len(df) + len(df2)) return not t.any_failed