def rollOut(self, node, state, depth): isTerminal = False while not isTerminal and depth < self.playout_depth: if self.policy_net is None: sample_x, sample_y = np.random.multivariate_normal( [Config.TEE_X, Config.TEE_Y], (1.83 / 2)**2 * np.identity(2), 1)[0] sample_spin = np.random.choice([0, 1]) else: features = self.features_generator.extract_features( state.body, state.ShotNum) prediction = self.policy_net.predict_p([features])[0] sample_action_idx = np.random.choice(np.arange( 2 * Config.IMAGE_WIDTH * Config.IMAGE_HEIGHT), p=prediction) sample_x, sample_y, sample_spin = actionID_to_xys( sample_action_idx) _, ShotVec = CreateShot(_ShotPos(sample_x, sample_y, sample_spin)) success, ResShot = Simulation(state, ShotVec, Config.RAND, -1) isTerminal = (state.ShotNum == 0) depth += 1 node.rollout_state = copy(state) return state
def init_table(self): if os.path.exists(os.path.join('table', 'collision_table.npy')): self.collision_table = np.load( os.path.join('table', 'collision_table.npy')) return """ This table contains collision info asumming spin = 0 (clock wise) """ self.collision_table = [[], []] for spin in [0, 1]: for h in range(Config.IMAGE_HEIGHT): print(h) for w in range(Config.IMAGE_WIDTH): candidates = [] for target_h in range(h - 1, h + 5): for target_w in range(w + 2): if (target_h > Config.IMAGE_HEIGHT - 1) or ( target_w > Config.IMAGE_WIDTH - 1): break random = 0 game_state = _GameState() game_state.LastEnd = 1 # 1-end gam game_state.body[0][0] = HtoX(h) game_state.body[0][1] = WtoY(w) game_state.WhiteToMove = not ( game_state.WhiteToMove) game_state.ShotNum += 1 prev_x = game_state.body[0][0] _, ShotVec = CreateShot( _ShotPos(HtoX(target_h), WtoY(target_w), spin)) success, ResShot = Simulation( game_state, ShotVec, 0., -1) next_x = game_state.body[0][0] #print([list(i) for i in list(game_state.body)]) if prev_x != next_x: candidates.append([target_h, target_w]) self.collision_table[spin].append(candidates) np.save('./table/collision_table', self.collision_table)
def playout(self, state): """ state -- a copy of the state. """ node = self.root isTerminal = False depth = 0 while not isTerminal and depth < self.playout_depth: #A = len(node.children) # num_children A = len(node.children[0]) + len(node.children[1]) if A < self.num_initActions: #if len(node.children[0]) < self.num_initActions: node, init_action_xy, init_spin = self.initChildren( node, state, depth) _, ShotVec = CreateShot( _ShotPos(init_action_xy[0], init_action_xy[1], init_spin)) success, ResShot = Simulation(state, ShotVec, Config.RAND, -1) isTerminal = (state.ShotNum == 0) depth += 1 break n_a = [c.n_visits for c in node.children[0].values() ] + [c.n_visits for c in node.children[1].values()] # progressive widening # if chilren node has been visited much times then expand #if np.sqrt(sum(n_a)) >= A: if sum(n_a) >= 10 * A: # expand node, expanded_action_xy, expanded_spin = self.expand(node) _, ShotVec = CreateShot( _ShotPos(expanded_action_xy[0], expanded_action_xy[1], expanded_spin)) success, ResShot = Simulation(state, ShotVec, Config.RAND, -1) isTerminal = (state.ShotNum == 0) # one end game depth += 1 break # select node, selected_action_xy, selected_spin = self.ucb_select(node) _, ShotVec = CreateShot( _ShotPos(selected_action_xy[0], selected_action_xy[1], selected_spin)) success, ResShot = Simulation(state, ShotVec, Config.RAND, -1) isTerminal = (state.ShotNum == 0) # one end game depth += 1 if isTerminal: break if not isTerminal and depth < self.playout_depth: # save the rollout_state for speed. #if node.rollout_state is None: state = self.rollOut(node, state, depth) #node.rollout_state = state #else: # state = node.rollout_state self.update(node, state)