def test_neigh_indexing(self): """If neighs are np arrays, they index differently than tuples""" NGF.get_eligible_chs(np.zeros((7, 7, 70), dtype=np.bool), (3, 2)) somegrid = np.random.uniform(size=(7, 7, 70)) n1 = somegrid[NGF.neighbors_sep(2, 3, 2, False)] n2 = somegrid[GF.neighbors(2, 3, 2, separate=True, include_self=False)] assert (n1 == n2).all() n1 = somegrid[NGF.neighbors(2, 3, 2, False)[0]] n2 = somegrid[GF.neighbors(2, 3, 2, include_self=False)[0]] assert (n1 == n2).all()
def optimal_ch(self, ce_type, cell) -> int: if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: chs = NGF.get_eligible_chs(self.grid, cell) if len(chs) == 0: return (None, ) * 5 else: chs = np.nonzero(self.grid[cell])[0] qvals_dense, _, freps, astates = self.get_qvals( self.grid, cell, ce_type, chs) self.qval_means.append(np.mean(qvals_dense)) if ce_type == CEvent.END: idx = max_idx = np.argmax(qvals_dense) ch = max_ch = chs[idx] else: ch, idx, _ = self.exploration_policy(self.epsilon, chs, qvals_dense, cell) max_idx = np.argmax(qvals_dense) max_ch = chs[max_idx] self.epsilon *= self.epsilon_decay if ch is None: self.logger.error( f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n") return ch, qvals_dense[idx], max_ch, freps, astates
def optimal_ch_val(self, ce_type, cell) -> int: if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: chs = NGF.get_eligible_chs(self.grid, cell) if len(chs) == 0: return (None, ) * 5 else: chs = np.nonzero(self.grid[cell])[0] old_frep, freps = self.afterstate_freps(self.grid, cell, ce_type, chs) # Q-value for each ch in 'chs' qvals_dense = self.net.forward_value(freps).reshape(len(chs)) if ce_type == CEvent.END: idx = np.argmax(qvals_dense) ch = chs[idx] # max_idx = idx # max_ch = ch else: ch, idx, _ = self.exploration_policy(self.epsilon, chs, qvals_dense, cell) # max_idx = np.argmax(qvals_dense) # max_ch = chs[max_idx] self.epsilon *= self.epsilon_decay if ch is None: self.logger.error( f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n") return ch, qvals_dense[idx], old_frep, freps[ idx], self.net.get_neglogpac(old_frep, cell, ch)
def optimal_ch(self, ce_type, cell) -> Tuple[int, float, int]: """Select the channel fitting for assignment that that has the maximum q-value according to an exploration policy, or select the channel for termination that has the minimum q-value in a greedy fashion. Return (ch, max_ch) where 'ch' is the selected channel according to exploration policy and max_ch' is the greedy (still eligible) channel. 'ch' (and 'max_ch') is None if no channel is eligible for assignment. """ inuse = np.nonzero(self.grid[cell])[0] n_used = len(inuse) if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: chs = NGF.get_eligible_chs(self.grid, cell) if len(chs) == 0: # No channels available for assignment, return (None, None, 0) else: # Channels in use at cell, including channel scheduled # for termination. The latter is included because it might # be the least valueable channel, in which case no # reassignment is done on call termination. chs = inuse # or no channels in use to reassign assert n_used > 0 # TODO If 'max_ch' turns out not to be useful, then don't return it and # avoid running a forward pass through the net if a random action is selected. qvals_dense = self.get_qvals(cell=cell, n_used=n_used, ce_type=ce_type, chs=chs) # Selecting a ch for reassigment is always greedy because no learning # is done on the reassignment actions. if ce_type == CEvent.END: amin_idx = np.argmin(qvals_dense) ch = max_ch = chs[amin_idx] p = 1 else: ch, idx, p = self.exploration_policy(self.epsilon, chs, qvals_dense, cell) if self.eps_log_decay: self.epsilon = self.epsilon0 / np.sqrt( self.t * 60 / self.eps_log_decay) else: self.epsilon *= self.epsilon_decay amax_idx = np.argmax(qvals_dense) max_ch = chs[amax_idx] # If qvals blow up ('NaN's and 'inf's), ch becomes none. if ch is None: self.logger.error( f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n") raise Exception self.logger.debug( f"Optimal ch: {ch} for event {ce_type} of possibilities {chs}") return (ch, max_ch, p)
def test_get_free_chs(self): grid = np.ones((7, 7, 70)).astype(bool) chs = [0, 4] cell = (3, 4) grid[cell][chs] = 0 for n in NGF.neighbors(2, *cell, False): grid[n][chs] = 0 free = NGF.get_eligible_chs(grid, cell) self._li_set_eq(free, chs)
def get_action(self, next_cevent, *args): ce_type, next_cell = next_cevent[1:3] if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: free = NGF.get_eligible_chs(self.grid, next_cell) if len(free) == 0: return None else: return np.random.choice(free) elif ce_type == CEvent.END: # No rearrangement is done when a call terminates. return next_cevent[3]
def get_hoff_qvals(self, cell, n_used, ce_type, chs, h_cell, grid): """ Look ahead for handoffs """ end_astates = NGF.afterstates(grid, cell, ce_type, chs) h_n_used = np.count_nonzero(self.grid[h_cell]) qvals_dense = -np.copy(self.qvals[cell][chs]) for i, astate in enumerate(end_astates): h_chs = NGF.get_eligible_chs(astate, h_cell) if len(h_chs) > 0: h_qvals_dense = self.get_qvals(h_cell, h_n_used, h_chs) qvals_dense[i] = np.max(h_qvals_dense) return qvals_dense
def optimal_ch_pol(self, ce_type, cell) -> int: if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: chs = NGF.get_eligible_chs(self.grid, cell) if len(chs) == 0: return (None, ) * 5 else: chs = np.nonzero(self.grid[cell])[0] frep = self.feature_rep(self.grid) ch, neglogpac = self.net.forward_action(frep, cell, ce_type, chs) next_frep = NGF.incremental_freps(self.grid, frep, cell, ce_type, np.array([ch])) val = self.net.forward_value(next_frep) return ch, val, frep, next_frep, neglogpac
def optimal_ch(self, ce_type, cell): inuse = np.nonzero(self.grid[cell])[0] n_used = len(inuse) if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: chs = NGF.get_eligible_chs(self.grid, cell) if len(chs) == 0: return (None, None, 0, None, None) else: chs = inuse assert n_used > 0 if ce_type == CEvent.END: next_event = self.env.eventgen.peek() if self.pp['hoff_lookahead'] and next_event[1] == CEvent.HOFF: qvals_dense = self.get_hoff_qvals(cell=cell, n_used=n_used, ce_type=ce_type, chs=chs, h_cell=next_event[2], grid=self.grid) idx = amax_idx = np.argmax(qvals_dense) ch = max_ch = chs[amax_idx] else: qvals_dense = self.get_qvals(cell=cell, n_used=n_used, ce_type=ce_type, chs=chs) idx = amax_idx = np.argmin(qvals_dense) ch = max_ch = chs[amax_idx] p = 1 else: qvals_dense = self.get_qvals(cell=cell, n_used=n_used, ce_type=ce_type, chs=chs) ch, idx, p = self.exploration_policy(self.epsilon, chs, qvals_dense, cell) if self.eps_log_decay: self.epsilon = self.epsilon0 / np.sqrt( self.t * 60 / self.eps_log_decay) else: self.epsilon *= self.epsilon_decay amax_idx = np.argmax(qvals_dense) max_ch = chs[amax_idx] assert ch is not None return (ch, max_ch, p, qvals_dense[idx], qvals_dense[amax_idx])
def get_hoff_qvals(self, grid, cell, ce_type, chs, h_cell): """ Look ahead for handoffs h_cell: target hand-off cell """ end_astates = NGF.afterstates(grid, cell, ce_type, chs) hoff_astates = [] n_hoff_astates = [] # For a given end_astate, how many hoff astates? for astate in end_astates: h_chs = NGF.get_eligible_chs(astate, h_cell) if len(h_chs) > 0: h_astates = NGF.afterstates(astate, h_cell, CEvent.HOFF, h_chs) hoff_astates.extend(h_astates) n = len(h_astates) else: n = 0 n_hoff_astates.append(n) cur_frep, freps = self.feature_rep(grid), self.feature_reps( end_astates) if len(hoff_astates) > 0: hoff_astates = np.array(hoff_astates) hfreps = self.feature_reps(hoff_astates) hqvals_dense = self.net.forward(freps=hfreps, grids=hoff_astates) assert hqvals_dense.shape == ( len(hoff_astates), ), hqvals_dense.shape qvals_dense = np.zeros(len(chs)) t = 0 for i, n in enumerate(n_hoff_astates): if n > 0: qvals_dense[i] = np.max(hqvals_dense[t:t + n]) t += n # Nasty hack for 2-step returns # All relevant hoff astates have same count reward = np.count_nonzero(hoff_astates[0]) if self.pp['target'] == 'avg': qvals_dense += reward - self.avg_reward else: qvals_dense *= self.pp['gamma']**2 qvals_dense += self.pp['gamma'] * reward else: # Not possible to assign HOFF for any reass on END. qvals_dense = self.net.forward(freps=freps, grids=end_astates) return qvals_dense, cur_frep, freps
def get_action(self, next_cevent, *args): ce_type, next_cell = next_cevent[1:3] if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: # When a call arrives in a cell, # if any pre-assigned channel is unused; # it is assigned, else the call is blocked. eligible_chs = NGF.get_eligible_chs(self.grid, next_cell) for ch in eligible_chs: if GF.nom_chs_mask[next_cell][ch]: return ch if len(eligible_chs) == 0: return None else: return np.random.choice(eligible_chs) elif ce_type == CEvent.END: inuse_chs = np.nonzero(self.grid[next_cell])[0] for ch in inuse_chs: if GF.nom_chs_mask[next_cell][ch]: return ch else: return next_cevent[3]
def optimal_ch(self, ce_type, cell): if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: # Calls eligible for assignment chs = NGF.get_eligible_chs(self.grid, cell) else: # Calls in progress chs = np.nonzero(self.grid[cell])[0] if len(chs) == 0: assert ce_type != CEvent.END return (None, None) a_dist, val = self.forward(cell=cell, ce_type=ce_type) greedy = True if ce_type == CEvent.END: if greedy: idx = np.argmin(a_dist[chs]) else: valid_a_dist = softmax(1 - a_dist[chs]) idx = np.random.choice(np.range(len(valid_a_dist)), p=valid_a_dist) else: if greedy: idx = np.argmax(a_dist[chs]) else: valid_a_dist = softmax(a_dist[chs]) idx = np.random.choice(np.range(len(valid_a_dist)), p=valid_a_dist) ch = chs[idx] # print(ce_type, a_dist, ch, a_dist[ch], chs) # TODO NOTE verify the above # If vals blow up ('NaN's and 'inf's), ch becomes none. if np.isinf(val) or np.isnan(val): self.logger.error(f"{ce_type}\n{chs}\n{val}\n\n") raise Exception self.logger.debug( f"Optimal ch: {ch} for event {ce_type} of possibilities {chs}") return (ch, val)
def optimal_ch(self, ce_type, cell) -> int: if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: chs = NGF.get_eligible_chs(self.grid, cell) if len(chs) == 0: return (None, ) * 5 else: chs = np.nonzero(self.grid[cell])[0] qvals_dense = self.get_qvals(self.grid, cell, ce_type, chs) self.qval_means.append(np.mean(qvals_dense)) if ce_type == CEvent.END: idx = max_idx = np.argmin(qvals_dense) ch = max_ch = chs[idx] p = 1 else: ch, idx, p = self.exploration_policy(self.epsilon, chs, qvals_dense, cell) max_idx = np.argmax(qvals_dense) max_ch = chs[max_idx] self.epsilon *= self.epsilon_decay assert ch is not None, f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n" return ch, qvals_dense[idx], max_ch, qvals_dense[max_idx], p
def optimal_ch(self, ce_type, cell) -> int: """ Select a channel and return selected channel, greedy channel, qval and frep for both, in addition to prob of picking selected channel, and current frep""" if ce_type == CEvent.NEW or ce_type == CEvent.HOFF: chs = NGF.get_eligible_chs(self.grid, cell) if len(chs) == 0: return Chs(None, None, 1), Qvals(None, None), Freps(None, None, None) else: chs = np.nonzero(self.grid[cell])[0] next_event = self.env.eventgen.peek() if self.pp['hoff_lookahead'] and next_event[1] == CEvent.HOFF: assert ce_type == CEvent.END qvals_dense, cur_frep, freps = self.get_hoff_qvals( self.grid, cell, ce_type, chs, next_event[2]) else: qvals_dense, cur_frep, freps = self.get_qvals( self.grid, cell, ce_type, chs) self.qval_means.append(np.mean(qvals_dense)) if ce_type == CEvent.END: idx = max_idx = np.argmax(qvals_dense) ch = max_ch = chs[idx] p = 1 if self.pp['debug']: val = qvals_dense[idx] _, freps1 = self.afterstate_freps(self.grid, cell, ce_type, np.array([ch])) v1 = self.net.forward(freps1)[0] # print("\n", qvals_dense, idx) frep2 = GF.afterstate_freps_naive(self.grid, cell, ce_type, chs)[idx] v2 = self.net.forward([frep2])[0] v3 = self.net.forward([freps1[0], freps1[0]])[0] v4 = self.net.forward([frep2, frep2])[0] # val: multi freps, multi tf # v1: single frep, single tf # v2: multi freps, single tf # v3: single freps, multi tf # v4: multi freps, multi tf # ON CPU: # (val == v3 == v4) != (v1 == v2) # ON GPU: All the same... # CONCLUSION: Running multiple samples at once through TF # yields different (higher accuracy) results print(val, v1, v2, v3, v4, "\n") else: ch, idx, p = self.exploration_policy(self.epsilon, chs, qvals_dense, cell) max_idx = np.argmax(qvals_dense) max_ch = chs[max_idx] if self.eps_log_decay: self.epsilon = self.epsilon0 / np.sqrt( self.t * 60 / self.eps_log_decay) else: self.epsilon *= self.epsilon_decay assert ch is not None, f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n" return Chs(ch, max_ch, p), Qvals(qvals_dense[idx], qvals_dense[max_idx]), Freps( cur_frep, freps[idx], freps[max_idx])