예제 #1
0
파일: testgrid.py 프로젝트: namch29/dca
 def test_neigh_indexing(self):
     """If neighs are np arrays, they index differently than tuples"""
     NGF.get_eligible_chs(np.zeros((7, 7, 70), dtype=np.bool), (3, 2))
     somegrid = np.random.uniform(size=(7, 7, 70))
     n1 = somegrid[NGF.neighbors_sep(2, 3, 2, False)]
     n2 = somegrid[GF.neighbors(2, 3, 2, separate=True, include_self=False)]
     assert (n1 == n2).all()
     n1 = somegrid[NGF.neighbors(2, 3, 2, False)[0]]
     n2 = somegrid[GF.neighbors(2, 3, 2, include_self=False)[0]]
     assert (n1 == n2).all()
예제 #2
0
파일: runner.py 프로젝트: morning197821/dca
 def run(self):
     from gridfuncs import GF
     from eventgen import CEvent
     strat = self.stratclass(self.pp, logger=self.logger)
     as_freps = GF.afterstate_freps(strat.grid, (3, 4), CEvent.NEW,
                                    range(70))
     as_freps_vals = strat.net.forward(as_freps)
     self.logger.error(as_freps_vals)
     self.logger.error(GF.nom_chs[3, 4])
     as_freps = GF.afterstate_freps(strat.grid, (4, 4), CEvent.NEW,
                                    range(70))
     as_freps_vals = strat.net.forward(as_freps)
     self.logger.error(as_freps_vals)
     self.logger.error(GF.nom_chs[4, 4])
예제 #3
0
파일: testgrid.py 프로젝트: namch29/dca
 def test_neighs(self):
     for r in range(7):
         for c in range(7):
             e1 = NGF.neighbors(2, r, c, False)
             e2 = GF.neighbors(2, r, c)
             assert (e1 == e2)
             for d in [1, 2, 4]:
                 n1 = NGF.neighbors_sep(d, r, c, False)
                 n2 = GF.neighbors(d,
                                   r,
                                   c,
                                   separate=True,
                                   include_self=False)
                 assert ((n1[0] == n2[0]).all())
             assert ((n1[1] == n2[1]).all())
예제 #4
0
 def mark_neighs(self, row, col, c1="#808080", c2="#DCDCDC", c3="#C0C0C0"):
     # if delete_other:
     #     neighs = GF.neighbors2(row, col)
     #     for r, li in enumerate(self.hexagons):
     #         for c, h in enumerate(li):
     #             if (r, c) not in neighs:
     #                 self.can.delete(h.shape)
     h = self.hexagons[row][col]
     self.can.itemconfigure(h.tags, fill=c1)
     for neigh in GF.neighbors(2, row, col):
         h = self.hexagons[neigh[0]][neigh[1]]
         self.can.itemconfigure(h.tags, fill=c2)
     for neigh in GF.neighbors(1, row, col):
         h = self.hexagons[neigh[0]][neigh[1]]
         self.can.itemconfigure(h.tags, fill=c3)
예제 #5
0
파일: qnet_rl.py 프로젝트: namch29/dca
 def update_qval_experience(self, *args, **kwargs):
     """
     Update qval for pp['batch_size'] experience tuples,
     sampled from the experience replay memory.
     """
     if len(self.exp_buffer) >= self.pp['buffer_size']:
         # Can't backprop before exp store has enough experiences
         data, weights, batch_idxes = self.exp_buffer.sample(
             self.pp['batch_size'],
             beta=self.pri_beta_schedule.value(self.i))
         if self.pp['freps']:
             freps = GF.feature_reps(data['grids'])
             next_freps = GF.feature_reps(data['next_grids'])
             data.update({
                 'freps': freps,
                 'next_freps': next_freps,
                 'next_chs': None,
             })
         data['weights'] = weights
         td_errs = self.backward(**data, gamma=self.gamma)
         new_priorities = np.abs(td_errs) + self.prioritized_replay_eps
         self.exp_buffer.update_priorities(batch_idxes, new_priorities)
예제 #6
0
파일: testgrid.py 프로젝트: namch29/dca
    def test_feature_rep(self):
        def check_n_free_self(grid, n_free):
            self.assertTrue((grid[:, :, -1] == n_free).all(),
                            (grid[:, :, -1], n_free))

        def check_n_used_neighs(grid, n_used):
            self.assertTrue((grid[:, :, :-1] == n_used).all())

        rows, cols, n_channels = 7, 7, 70
        # Three grids in one array. They should not affect each other's
        # feature representation
        grid1 = np.zeros((rows, cols, n_channels), dtype=bool)
        grid2 = np.zeros((rows, cols, n_channels), dtype=bool)
        grid3 = np.zeros((rows, cols, n_channels), dtype=bool)
        grid2[:, :, 0] = 1
        grid3[1, 2, 9] = 1
        grid3c = np.copy(grid3)
        fgrid1 = GF.feature_reps(grid1)[0]
        fgrid2 = GF.feature_reps(grid2)[0]
        fgrid3 = GF.feature_reps(grid3)[0]

        # Verify that single- and multi-version works the same
        grids = np.array([grid1, grid2, grid3])
        fgrids = GF.feature_reps(grids)
        assert (grid3 == grid3c).all()
        self.assertTrue((fgrids[0] == fgrid1).all())
        self.assertTrue((fgrids[1] == fgrid2).all())
        self.assertTrue((fgrids[2] == fgrid3).all())

        # Verify Grid #1
        # No cell has any channels in use, i.e. all are free
        check_n_free_self(fgrid1, np.ones((rows, cols)) * n_channels)
        # No cell has a channel in use by any of its neighbors
        check_n_used_neighs(fgrid1, np.zeros((rows, cols, n_channels)))

        # Verify Grid #2
        # All cells have one channel in use
        check_n_free_self(fgrid2, np.ones((rows, cols)) * (n_channels - 1))
        # Every cell has 'n_neighs(cell)' neighbors4 who uses channel 0
        # ('n_neighs(cell)' depends on cell coordinates)
        n_used2 = np.zeros((rows, cols, n_channels))
        for r in range(rows):
            for c in range(cols):
                n_neighs = len(GF.neighbors(4, r, c))
                n_used2[r][c][0] = n_neighs + 1
        check_n_used_neighs(fgrid2, n_used2)

        # Verify Grid #3
        # Only cell (row, col) = (1, 2) has a channel in use (ch9)
        n_free3 = np.ones((rows, cols)) * n_channels
        n_free3[1][2] = n_channels - 1
        # Cell (1, 2) has no neighs that use ch9. The neighs of (1, 2)
        # has 1 cell that use ch9.
        n_used3 = np.zeros((rows, cols, n_channels))
        neighs3 = GF.neighbors(4, 1, 2, separate=True, include_self=True)
        n_used3[(*neighs3, np.repeat(9, len(neighs3[0])))] = 1
        check_n_used_neighs(fgrid3, n_used3)
예제 #7
0
파일: qnet_rl.py 프로젝트: namch29/dca
    def optimal_ch(self, ce_type, cell) -> Tuple[int, float]:
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            # Calls eligible for assignment
            chs = GF.get_eligible_chs(self.grid, cell)
        else:
            # Calls in progress
            chs = np.nonzero(self.grid[cell])[0]
        if len(chs) == 0:
            assert ce_type != CEvent.END
            return (None, None)

        a_dist, val = self.forward(cell=cell, ce_type=ce_type)
        greedy = True
        if ce_type == CEvent.END:
            if greedy:
                idx = np.argmin(a_dist[chs])
            else:
                valid_a_dist = softmax(1 - a_dist[chs])
                idx = np.random.choice(np.range(len(valid_a_dist)),
                                       p=valid_a_dist)
        else:
            if greedy:
                idx = np.argmax(a_dist[chs])
            else:
                valid_a_dist = softmax(a_dist[chs])
                idx = np.random.choice(np.range(len(valid_a_dist)),
                                       p=valid_a_dist)
        ch = chs[idx]
        # print(ce_type, a_dist, ch, a_dist[ch], chs)
        # TODO NOTE verify the above

        # If vals blow up ('NaN's and 'inf's), ch becomes none.
        if np.isinf(val) or np.isnan(val):
            self.logger.error(f"{ce_type}\n{chs}\n{val}\n\n")
            raise Exception

        self.logger.debug(
            f"Optimal ch: {ch} for event {ce_type} of possibilities {chs}")
        return (ch, val)
예제 #8
0
파일: testgrid.py 프로젝트: namch29/dca
    def test_afterstate_freps(self):
        """
        Test incremental vs naive afterstate feature rep. derivation approach
        """
        def check_frep(frep1, frep2):
            self.assertTrue(frep1.shape == frep2.shape,
                            (frep1.shape, frep2.shape))
            self.assertTrue(frep2.shape == (7, 7, 71))
            eq_n_used = frep1[:, :, -1] == frep2[:, :, -1]
            diff_n_used = np.where(np.invert(eq_n_used))
            self.assertTrue(
                eq_n_used.all(),
                ("\n", diff_n_used, frep1[:, :, -1], frep2[:, :, -1]))
            eq_n_free = frep1[:, :, :-1] == frep2[:, :, :-1]
            diff_n_free = np.where(np.invert(eq_n_free))
            self.assertTrue(
                eq_n_free.all(),
                (diff_n_free, frep1[diff_n_free], frep2[diff_n_free]))

        def check_freps(freps1, freps2):
            self.assertTrue(freps1.shape == freps2.shape,
                            (freps1.shape, freps2.shape))
            self.assertTrue(freps2.shape[1:] == (7, 7, 71))
            eq_n_free = freps1[:, :, :, :-1] == freps2[:, :, :, :-1]
            diff_n_free = np.where(np.invert(eq_n_free))
            self.assertTrue(
                eq_n_free.all(),
                (diff_n_free, freps1[diff_n_free], freps2[diff_n_free]))
            eq_n_used = freps1[:, :, :, -1] == freps2[:, :, :, -1]
            diff_n_used = np.where(np.invert(eq_n_used))
            self.assertTrue(
                eq_n_used.all(),
                ("\n", diff_n_used, freps1[:, :, :, -1], freps2[:, :, :, -1]))

        # Grid 1: No channels in use
        rows, cols, n_channels = 7, 7, 70
        grid1 = np.zeros((rows, cols, n_channels), dtype=bool)
        grid1c = np.copy(grid1)
        cell1 = (2, 3)
        ce_type1 = CEvent.NEW
        chs1 = GF.get_eligible_chs(grid1, cell1)

        # Check that vanilla gridfuncs (GF) and numba gridfuncs (NGF) work the same
        freps_inc1 = GF.afterstate_freps(grid1, cell1, ce_type1, chs1)
        freps_inc1b = NGF.afterstate_freps(grid1, cell1, ce_type1, chs1)
        # Check that incremental frep function works the same as naive approach
        astates1 = GF.afterstates(grid1, cell1, ce_type1, chs1)
        astates1b = NGF.afterstates(grid1, cell1, ce_type1, chs1)
        freps1 = GF.feature_reps(astates1)  # GF takes multiple grids
        frep1b = NGF.feature_rep(
            astates1b[0])  # NGF version takes a single grid
        check_frep(freps1[0], frep1b)
        check_freps(freps_inc1, freps1)
        check_freps(freps_inc1b, freps1)
        assert (grid1 == grid1c
                ).all()  # Grid should not have been modified by any funcs

        # Grid 2: All channels in use
        grid2 = np.ones((rows, cols, n_channels), dtype=bool)
        cell2 = (2, 3)
        grid2[cell2][4] = 0
        grid2[(2, 4)][:] = 0
        grid2c = np.copy(grid2)
        ce_type2 = CEvent.END
        chs2 = np.nonzero(grid2[cell2])[0]
        freps_inc2 = GF.afterstate_freps(grid2, cell2, ce_type2, chs2)
        freps_inc2b = NGF.afterstate_freps(grid2, cell2, ce_type2, chs2)
        astates2 = GF.afterstates(grid2, cell2, ce_type2, chs2)
        freps2 = GF.feature_reps(astates2)
        check_freps(freps_inc2, freps2)
        check_freps(freps_inc2b, freps2)
        assert (grid2 == grid2c).all()

        # Grid 3: One channel in use
        grid3 = np.zeros((rows, cols, n_channels), dtype=bool)
        cell3 = (4, 1)
        grid3[cell3][4] = 1
        grid3c = np.copy(grid3)
        ce_type3 = CEvent.END
        chs3 = np.nonzero(grid3[cell3])[0]
        freps_inc3 = GF.afterstate_freps(grid3, cell3, ce_type3, chs3)
        freps_inc3b = NGF.afterstate_freps(grid3, cell3, ce_type3, chs3)
        astates3 = GF.afterstates(grid3, cell3, ce_type3, chs3)
        freps3 = GF.feature_reps(astates3)
        check_freps(freps_inc3, freps3)
        check_freps(freps_inc3b, freps3)
        assert (grid3 == grid3c).all()

        #
        grid4 = np.zeros((rows, cols, n_channels), dtype=bool)
        grid4[(4, 1)][4] = 1
        cell4 = (3, 1)
        frep_inc4 = NGF.afterstate_freps(grid4, cell4, CEvent.NEW,
                                         np.array([3]))[0]
        grid4[cell4][3] = 1
        frep4 = NGF.feature_rep(grid4)
        check_frep(frep_inc4, frep4)
예제 #9
0
파일: vnet_rl.py 프로젝트: namch29/dca
    def optimal_ch(self, ce_type, cell) -> int:
        """ Select a channel and return selected channel, greedy channel, qval and frep for both,
        in addition to prob of picking selected channel, and current frep"""
        if ce_type == CEvent.NEW or ce_type == CEvent.HOFF:
            chs = NGF.get_eligible_chs(self.grid, cell)
            if len(chs) == 0:
                return Chs(None, None,
                           1), Qvals(None, None), Freps(None, None, None)
        else:
            chs = np.nonzero(self.grid[cell])[0]

        next_event = self.env.eventgen.peek()
        if self.pp['hoff_lookahead'] and next_event[1] == CEvent.HOFF:
            assert ce_type == CEvent.END
            qvals_dense, cur_frep, freps = self.get_hoff_qvals(
                self.grid, cell, ce_type, chs, next_event[2])
        else:
            qvals_dense, cur_frep, freps = self.get_qvals(
                self.grid, cell, ce_type, chs)
        self.qval_means.append(np.mean(qvals_dense))
        if ce_type == CEvent.END:
            idx = max_idx = np.argmax(qvals_dense)
            ch = max_ch = chs[idx]
            p = 1
            if self.pp['debug']:
                val = qvals_dense[idx]
                _, freps1 = self.afterstate_freps(self.grid, cell, ce_type,
                                                  np.array([ch]))
                v1 = self.net.forward(freps1)[0]
                # print("\n", qvals_dense, idx)
                frep2 = GF.afterstate_freps_naive(self.grid, cell, ce_type,
                                                  chs)[idx]
                v2 = self.net.forward([frep2])[0]
                v3 = self.net.forward([freps1[0], freps1[0]])[0]
                v4 = self.net.forward([frep2, frep2])[0]
                # val: multi freps, multi tf
                # v1: single frep, single tf
                # v2: multi freps, single tf
                # v3: single freps, multi tf
                # v4: multi freps, multi tf
                # ON CPU:
                # (val == v3 == v4) != (v1 == v2)
                # ON GPU: All the same...
                # CONCLUSION: Running multiple samples at once through TF
                # yields different (higher accuracy) results
                print(val, v1, v2, v3, v4, "\n")
        else:
            ch, idx, p = self.exploration_policy(self.epsilon, chs,
                                                 qvals_dense, cell)
            max_idx = np.argmax(qvals_dense)
            max_ch = chs[max_idx]
            if self.eps_log_decay:
                self.epsilon = self.epsilon0 / np.sqrt(
                    self.t * 60 / self.eps_log_decay)
            else:
                self.epsilon *= self.epsilon_decay

        assert ch is not None, f"ch is none for {ce_type}\n{chs}\n{qvals_dense}\n"
        return Chs(ch, max_ch,
                   p), Qvals(qvals_dense[idx], qvals_dense[max_idx]), Freps(
                       cur_frep, freps[idx], freps[max_idx])