def test_neigh_indexing(self): """If neighs are np arrays, they index differently than tuples""" NGF.get_eligible_chs(np.zeros((7, 7, 70), dtype=np.bool), (3, 2)) somegrid = np.random.uniform(size=(7, 7, 70)) n1 = somegrid[NGF.neighbors_sep(2, 3, 2, False)] n2 = somegrid[GF.neighbors(2, 3, 2, separate=True, include_self=False)] assert (n1 == n2).all() n1 = somegrid[NGF.neighbors(2, 3, 2, False)[0]] n2 = somegrid[GF.neighbors(2, 3, 2, include_self=False)[0]] assert (n1 == n2).all()
def run(self): from gridfuncs import GF from eventgen import CEvent strat = self.stratclass(self.pp, logger=self.logger) as_freps = GF.afterstate_freps(strat.grid, (3, 4), CEvent.NEW, range(70)) as_freps_vals = strat.net.forward(as_freps) self.logger.error(as_freps_vals) self.logger.error(GF.nom_chs[3, 4]) as_freps = GF.afterstate_freps(strat.grid, (4, 4), CEvent.NEW, range(70)) as_freps_vals = strat.net.forward(as_freps) self.logger.error(as_freps_vals) self.logger.error(GF.nom_chs[4, 4])
def test_neighs(self): for r in range(7): for c in range(7): e1 = NGF.neighbors(2, r, c, False) e2 = GF.neighbors(2, r, c) assert (e1 == e2) for d in [1, 2, 4]: n1 = NGF.neighbors_sep(d, r, c, False) n2 = GF.neighbors(d, r, c, separate=True, include_self=False) assert ((n1[0] == n2[0]).all()) assert ((n1[1] == n2[1]).all())
def mark_neighs(self, row, col, c1="#808080", c2="#DCDCDC", c3="#C0C0C0"): # if delete_other: # neighs = GF.neighbors2(row, col) # for r, li in enumerate(self.hexagons): # for c, h in enumerate(li): # if (r, c) not in neighs: # self.can.delete(h.shape) h = self.hexagons[row][col] self.can.itemconfigure(h.tags, fill=c1) for neigh in GF.neighbors(2, row, col): h = self.hexagons[neigh[0]][neigh[1]] self.can.itemconfigure(h.tags, fill=c2) for neigh in GF.neighbors(1, row, col): h = self.hexagons[neigh[0]][neigh[1]] self.can.itemconfigure(h.tags, fill=c3)
def update_qval_experience(self, *args, **kwargs): """ Update qval for pp['batch_size'] experience tuples, sampled from the experience replay memory. """ if len(self.exp_buffer) >= self.pp['buffer_size']: # Can't backprop before exp store has enough experiences data, weights, batch_idxes = self.exp_buffer.sample( self.pp['batch_size'], beta=self.pri_beta_schedule.value(self.i)) if self.pp['freps']: freps = GF.feature_reps(data['grids']) next_freps = GF.feature_reps(data['next_grids']) data.update({ 'freps': freps, 'next_freps': next_freps, 'next_chs': None, }) data['weights'] = weights td_errs = self.backward(**data, gamma=self.gamma) new_priorities = np.abs(td_errs) + self.prioritized_replay_eps self.exp_buffer.update_priorities(batch_idxes, new_priorities)
def test_feature_rep(self): def check_n_free_self(grid, n_free): self.assertTrue((grid[:, :, -1] == n_free).all(), (grid[:, :, -1], n_free)) def check_n_used_neighs(grid, n_used): self.assertTrue((grid[:, :, :-1] == n_used).all()) rows, cols, n_channels = 7, 7, 70 # Three grids in one array. They should not affect each other's # feature representation grid1 = np.zeros((rows, cols, n_channels), dtype=bool) grid2 = np.zeros((rows, cols, n_channels), dtype=bool) grid3 = np.zeros((rows, cols, n_channels), dtype=bool) grid2[:, :, 0] = 1 grid3[1, 2, 9] = 1 grid3c = np.copy(grid3) fgrid1 = GF.feature_reps(grid1)[0] fgrid2 = GF.feature_reps(grid2)[0] fgrid3 = GF.feature_reps(grid3)[0] # Verify that single- and multi-version works the same grids = np.array([grid1, grid2, grid3]) fgrids = GF.feature_reps(grids) assert (grid3 == grid3c).all() self.assertTrue((fgrids[0] == fgrid1).all()) self.assertTrue((fgrids[1] == fgrid2).all()) self.assertTrue((fgrids[2] == fgrid3).all()) # Verify Grid #1 # No cell has any channels in use, i.e. all are free check_n_free_self(fgrid1, np.ones((rows, cols)) * n_channels) # No cell has a channel in use by any of its neighbors check_n_used_neighs(fgrid1, np.zeros((rows, cols, n_channels))) # Verify Grid #2 # All cells have one channel in use check_n_free_self(fgrid2, np.ones((rows, cols)) * (n_channels - 1)) # Every cell has 'n_neighs(cell)' neighbors4 who uses channel 0 # ('n_neighs(cell)' depends on cell coordinates) n_used2 = np.zeros((rows, cols, n_channels)) for r in range(rows): for c in range(cols): n_neighs = len(GF.neighbors(4, r, c)) n_used2[r][c][0] = n_neighs + 1 check_n_used_neighs(fgrid2, n_used2) # Verify Grid #3 # Only cell (row, col) = (1, 2) has a channel in use (ch9) n_free3 = np.ones((rows, cols)) * n_channels n_free3[1][2] = n_channels - 1 # Cell (1, 2) has no neighs that use ch9. The neighs of (1, 2) # has 1 cell that use ch9. n_used3 = np.zeros((rows, cols, n_channels)) neighs3 = GF.neighbors(4, 1, 2, separate=True, include_self=True) n_used3[(*neighs3, np.repeat(9, len(neighs3[0])))] = 1 check_n_used_neighs(fgrid3, n_used3)
def optimal_ch(self, ce_type, cell) -> Tuple[int, float]: if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: # Calls eligible for assignment chs = GF.get_eligible_chs(self.grid, cell) else: # Calls in progress chs = np.nonzero(self.grid[cell])[0] if len(chs) == 0: assert ce_type != CEvent.END return (None, None) a_dist, val = self.forward(cell=cell, ce_type=ce_type) greedy = True if ce_type == CEvent.END: if greedy: idx = np.argmin(a_dist[chs]) else: valid_a_dist = softmax(1 - a_dist[chs]) idx = np.random.choice(np.range(len(valid_a_dist)), p=valid_a_dist) else: if greedy: idx = np.argmax(a_dist[chs]) else: valid_a_dist = softmax(a_dist[chs]) idx = np.random.choice(np.range(len(valid_a_dist)), p=valid_a_dist) ch = chs[idx] # print(ce_type, a_dist, ch, a_dist[ch], chs) # TODO NOTE verify the above # If vals blow up ('NaN's and 'inf's), ch becomes none. if np.isinf(val) or np.isnan(val): self.logger.error(f"{ce_type}\n{chs}\n{val}\n\n") raise Exception self.logger.debug( f"Optimal ch: {ch} for event {ce_type} of possibilities {chs}") return (ch, val)
def test_afterstate_freps(self): """ Test incremental vs naive afterstate feature rep. derivation approach """ def check_frep(frep1, frep2): self.assertTrue(frep1.shape == frep2.shape, (frep1.shape, frep2.shape)) self.assertTrue(frep2.shape == (7, 7, 71)) eq_n_used = frep1[:, :, -1] == frep2[:, :, -1] diff_n_used = np.where(np.invert(eq_n_used)) self.assertTrue( eq_n_used.all(), ("\n", diff_n_used, frep1[:, :, -1], frep2[:, :, -1])) eq_n_free = frep1[:, :, :-1] == frep2[:, :, :-1] diff_n_free = np.where(np.invert(eq_n_free)) self.assertTrue( eq_n_free.all(), (diff_n_free, frep1[diff_n_free], frep2[diff_n_free])) def check_freps(freps1, freps2): self.assertTrue(freps1.shape == freps2.shape, (freps1.shape, freps2.shape)) self.assertTrue(freps2.shape[1:] == (7, 7, 71)) eq_n_free = freps1[:, :, :, :-1] == freps2[:, :, :, :-1] diff_n_free = np.where(np.invert(eq_n_free)) self.assertTrue( eq_n_free.all(), (diff_n_free, freps1[diff_n_free], freps2[diff_n_free])) eq_n_used = freps1[:, :, :, -1] == freps2[:, :, :, -1] diff_n_used = np.where(np.invert(eq_n_used)) self.assertTrue( eq_n_used.all(), ("\n", diff_n_used, freps1[:, :, :, -1], freps2[:, :, :, -1])) # Grid 1: No channels in use rows, cols, n_channels = 7, 7, 70 grid1 = np.zeros((rows, cols, n_channels), dtype=bool) grid1c = np.copy(grid1) cell1 = (2, 3) ce_type1 = CEvent.NEW chs1 = GF.get_eligible_chs(grid1, cell1) # Check that vanilla gridfuncs (GF) and numba gridfuncs (NGF) work the same freps_inc1 = GF.afterstate_freps(grid1, cell1, ce_type1, chs1) freps_inc1b = NGF.afterstate_freps(grid1, cell1, ce_type1, chs1) # Check that incremental frep function works the same as naive approach astates1 = GF.afterstates(grid1, cell1, ce_type1, chs1) astates1b = NGF.afterstates(grid1, cell1, ce_type1, chs1) freps1 = GF.feature_reps(astates1) # GF takes multiple grids frep1b = NGF.feature_rep( astates1b[0]) # NGF version takes a single grid check_frep(freps1[0], frep1b) check_freps(freps_inc1, freps1) check_freps(freps_inc1b, freps1) assert (grid1 == grid1c ).all() # Grid should not have been modified by any funcs # Grid 2: All channels in use grid2 = np.ones((rows, cols, n_channels), dtype=bool) cell2 = (2, 3) grid2[cell2][4] = 0 grid2[(2, 4)][:] = 0 grid2c = np.copy(grid2) ce_type2 = CEvent.END chs2 = np.nonzero(grid2[cell2])[0] freps_inc2 = GF.afterstate_freps(grid2, cell2, ce_type2, chs2) freps_inc2b = NGF.afterstate_freps(grid2, cell2, ce_type2, chs2) astates2 = GF.afterstates(grid2, cell2, ce_type2, chs2) freps2 = GF.feature_reps(astates2) check_freps(freps_inc2, freps2) check_freps(freps_inc2b, freps2) assert (grid2 == grid2c).all() # Grid 3: One channel in use grid3 = np.zeros((rows, cols, n_channels), dtype=bool) cell3 = (4, 1) grid3[cell3][4] = 1 grid3c = np.copy(grid3) ce_type3 = CEvent.END chs3 = np.nonzero(grid3[cell3])[0] freps_inc3 = GF.afterstate_freps(grid3, cell3, ce_type3, chs3) freps_inc3b = NGF.afterstate_freps(grid3, cell3, ce_type3, chs3) astates3 = GF.afterstates(grid3, cell3, ce_type3, chs3) freps3 = GF.feature_reps(astates3) check_freps(freps_inc3, freps3) check_freps(freps_inc3b, freps3) assert (grid3 == grid3c).all() # grid4 = np.zeros((rows, cols, n_channels), dtype=bool) grid4[(4, 1)][4] = 1 cell4 = (3, 1) frep_inc4 = NGF.afterstate_freps(grid4, cell4, CEvent.NEW, np.array([3]))[0] grid4[cell4][3] = 1 frep4 = NGF.feature_rep(grid4) check_frep(frep_inc4, frep4)
def optimal_ch(self, ce_type, cell) -> int: """ Select a channel and return selected channel, greedy channel, qval and frep for both, in addition to prob of picking selected channel, and current frep""" if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: chs = NGF.get_eligible_chs(self.grid, cell) if len(chs) == 0: return Chs(None, None, 1), Qvals(None, None), Freps(None, None, None) else: chs = np.nonzero(self.grid[cell])[0] next_event = self.env.eventgen.peek() if self.pp['hoff_lookahead'] and next_event[1] == CEvent.HOFF: assert ce_type == CEvent.END qvals_dense, cur_frep, freps = self.get_hoff_qvals( self.grid, cell, ce_type, chs, next_event[2]) else: qvals_dense, cur_frep, freps = self.get_qvals( self.grid, cell, ce_type, chs) self.qval_means.append(np.mean(qvals_dense)) if ce_type == CEvent.END: idx = max_idx = np.argmax(qvals_dense) ch = max_ch = chs[idx] p = 1 if self.pp['debug']: val = qvals_dense[idx] _, freps1 = self.afterstate_freps(self.grid, cell, ce_type, np.array([ch])) v1 = self.net.forward(freps1)[0] # print("\n", qvals_dense, idx) frep2 = GF.afterstate_freps_naive(self.grid, cell, ce_type, chs)[idx] v2 = self.net.forward([frep2])[0] v3 = self.net.forward([freps1[0], freps1[0]])[0] v4 = self.net.forward([frep2, frep2])[0] # val: multi freps, multi tf # v1: single frep, single tf # v2: multi freps, single tf # v3: single freps, multi tf # v4: multi freps, multi tf # ON CPU: # (val == v3 == v4) != (v1 == v2) # ON GPU: All the same... # CONCLUSION: Running multiple samples at once through TF # yields different (higher accuracy) results print(val, v1, v2, v3, v4, "\n") else: ch, idx, p = self.exploration_policy(self.epsilon, chs, qvals_dense, cell) max_idx = np.argmax(qvals_dense) max_ch = chs[max_idx] if self.eps_log_decay: self.epsilon = self.epsilon0 / np.sqrt( self.t * 60 / self.eps_log_decay) else: self.epsilon *= self.epsilon_decay assert ch is not None, f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n" return Chs(ch, max_ch, p), Qvals(qvals_dense[idx], qvals_dense[max_idx]), Freps( cur_frep, freps[idx], freps[max_idx])