Exemple #1
0
    def rollOut(self, node, state, depth):
        isTerminal = False
        while not isTerminal and depth < self.playout_depth:
            if self.policy_net is None:
                sample_x, sample_y = np.random.multivariate_normal(
                    [Config.TEE_X, Config.TEE_Y],
                    (1.83 / 2)**2 * np.identity(2), 1)[0]
                sample_spin = np.random.choice([0, 1])
            else:
                features = self.features_generator.extract_features(
                    state.body, state.ShotNum)
                prediction = self.policy_net.predict_p([features])[0]
                sample_action_idx = np.random.choice(np.arange(
                    2 * Config.IMAGE_WIDTH * Config.IMAGE_HEIGHT),
                                                     p=prediction)
                sample_x, sample_y, sample_spin = actionID_to_xys(
                    sample_action_idx)

            _, ShotVec = CreateShot(_ShotPos(sample_x, sample_y, sample_spin))
            success, ResShot = Simulation(state, ShotVec, Config.RAND, -1)

            isTerminal = (state.ShotNum == 0)

            depth += 1

        node.rollout_state = copy(state)

        return state
    def init_table(self):
        if os.path.exists(os.path.join('table', 'collision_table.npy')):
            self.collision_table = np.load(
                os.path.join('table', 'collision_table.npy'))
            return
        """
            This table contains collision info asumming spin = 0 (clock wise)
        """

        self.collision_table = [[], []]
        for spin in [0, 1]:
            for h in range(Config.IMAGE_HEIGHT):
                print(h)
                for w in range(Config.IMAGE_WIDTH):
                    candidates = []

                    for target_h in range(h - 1, h + 5):
                        for target_w in range(w + 2):
                            if (target_h > Config.IMAGE_HEIGHT - 1) or (
                                    target_w > Config.IMAGE_WIDTH - 1):
                                break

                            random = 0
                            game_state = _GameState()
                            game_state.LastEnd = 1  # 1-end gam
                            game_state.body[0][0] = HtoX(h)
                            game_state.body[0][1] = WtoY(w)
                            game_state.WhiteToMove = not (
                                game_state.WhiteToMove)
                            game_state.ShotNum += 1

                            prev_x = game_state.body[0][0]

                            _, ShotVec = CreateShot(
                                _ShotPos(HtoX(target_h), WtoY(target_w), spin))

                            success, ResShot = Simulation(
                                game_state, ShotVec, 0., -1)
                            next_x = game_state.body[0][0]
                            #print([list(i) for i in list(game_state.body)])
                            if prev_x != next_x:
                                candidates.append([target_h, target_w])
                    self.collision_table[spin].append(candidates)
        np.save('./table/collision_table', self.collision_table)
Exemple #3
0
    def playout(self, state):
        """
            state  -- a copy of the state.
        
        """
        node = self.root
        isTerminal = False
        depth = 0

        while not isTerminal and depth < self.playout_depth:
            #A = len(node.children) # num_children
            A = len(node.children[0]) + len(node.children[1])
            if A < self.num_initActions:
                #if len(node.children[0]) < self.num_initActions:
                node, init_action_xy, init_spin = self.initChildren(
                    node, state, depth)
                _, ShotVec = CreateShot(
                    _ShotPos(init_action_xy[0], init_action_xy[1], init_spin))
                success, ResShot = Simulation(state, ShotVec, Config.RAND, -1)
                isTerminal = (state.ShotNum == 0)

                depth += 1
                break

            n_a = [c.n_visits for c in node.children[0].values()
                   ] + [c.n_visits for c in node.children[1].values()]
            # progressive widening
            # if chilren node has been visited much times then expand
            #if np.sqrt(sum(n_a)) >= A:
            if sum(n_a) >= 10 * A:
                # expand
                node, expanded_action_xy, expanded_spin = self.expand(node)
                _, ShotVec = CreateShot(
                    _ShotPos(expanded_action_xy[0], expanded_action_xy[1],
                             expanded_spin))
                success, ResShot = Simulation(state, ShotVec, Config.RAND, -1)
                isTerminal = (state.ShotNum == 0)  # one end game

                depth += 1
                break

            # select
            node, selected_action_xy, selected_spin = self.ucb_select(node)
            _, ShotVec = CreateShot(
                _ShotPos(selected_action_xy[0], selected_action_xy[1],
                         selected_spin))
            success, ResShot = Simulation(state, ShotVec, Config.RAND, -1)
            isTerminal = (state.ShotNum == 0)  # one end game

            depth += 1

            if isTerminal:
                break

        if not isTerminal and depth < self.playout_depth:
            # save the rollout_state for speed.
            #if node.rollout_state is None:
            state = self.rollOut(node, state, depth)
            #node.rollout_state = state
            #else:
            #    state = node.rollout_state

        self.update(node, state)