def search(self, state: np.ndarray, time_limit: float=None, max_states: int=None) -> bool: """Seaches according to the batched, weighted A* algorithm While there is time left, the algorithm finds the best `expansions` open states (using priority queue) with lowest cost according to the A* cost heuristic (see `self.cost`). From these, it expands to new open states according to `self.expand_batch`. """ self.tt.tick() time_limit, max_states = self.reset(time_limit, max_states) if cube.is_solved(state): return True #First node self.indices[state.tostring()], self.states[1], self.G[1] = 1, state, 0 heapq.heappush( self.open_queue, (0, 1) ) #Given cost 0: Should not matter; just to avoid np.empty weirdness while self.tt.tock() < time_limit and len(self) + self.expansions * cube.action_dim <= max_states: self.tt.profile("Remove nodes from open priority queue") n_remove = min( len(self.open_queue), self.expansions ) expand_idcs = np.array([ heapq.heappop(self.open_queue)[1] for _ in range(n_remove) ], dtype=int) self.tt.end_profile("Remove nodes from open priority queue") is_won = self.expand_batch(expand_idcs) if is_won: #🦀🦀🦀WE DID IT BOIS🦀🦀🦀 i = self.indices[ cube.get_solved().tostring() ] #Build action queue while i != 1: self.action_queue.appendleft( self.parent_actions[i] ) i = self.parents[i] return True return False
def search(self, state: np.ndarray, time_limit: float=None, max_states: int=None) -> bool: # Returns whether a path was found and generates action queue # Implement _step method for agents that look one step ahead, otherwise overwrite this method time_limit, max_states = self.reset(time_limit, max_states) self.tt.tick() if cube.is_solved(state): return True while self.tt.tock() < time_limit and len(self) < max_states: action, state, solution_found = self._step(state) self.action_queue.append(action) if solution_found: self._explored_states = len(self.action_queue) return True self._explored_states = len(self.action_queue) return False
def search(self, state: np.ndarray, time_limit: float=None, max_states: int=None) -> bool: time_limit, max_states = self.reset(time_limit, max_states) self.tt.tick() if cube.is_solved(state): return True while self.tt.tock() < time_limit and len(self) + self.workers * self.depth <= max_states: # Expand from current best state paths, states, states_oh, solved = self.expand(state) # Break if solution is found if solved != (-1, -1): self.action_queue += deque(paths[solved[0], :solved[1]]) return True # Update state with the high ground v = self.net(states_oh, policy=False).cpu().squeeze() best_value_index = int(v.argmax()) state = states[best_value_index] worker, depth = best_value_index // self.depth, best_value_index % self.depth self.action_queue += deque(paths[worker, :depth+1]) return False
def _rotation_tests(self): state = cube.get_solved() for action in cube.action_space: state = cube.rotate(state, *action) # Tests that stringify and by extensions as633 works on assembled state = cube.get_solved() assert cube.stringify(state) == "\n".join([ " 2 2 2 ", " 2 2 2 ", " 2 2 2 ", "4 4 4 0 0 0 5 5 5 1 1 1", "4 4 4 0 0 0 5 5 5 1 1 1", "4 4 4 0 0 0 5 5 5 1 1 1", " 3 3 3 ", " 3 3 3 ", " 3 3 3 ", ]) # Performs moves and checks if are assembled/not assembled as expected moves = ((0, 1), (0, 0), (0, 1), (1, 1), (2, 0), (3, 0)) assembled = (False, True, False, False, False, False) for m, a in zip(moves, assembled): state = cube.rotate(state, *m) assert a == cube.is_solved(state) # Tests more moves moves = ((3, 1), (2, 1), (1, 0), (0, 0)) assembled = (False, False, False, True) for m, a in zip(moves, assembled): state = cube.rotate(state, *m) assert a == cube.is_solved(state) # Performs move and checks if it fits with how the string representation would look state = cube.get_solved() state = cube.rotate(state, *(0, 1)) assert cube.stringify(state) == "\n".join([ " 2 2 2 ", " 2 2 2 ", " 5 5 5 ", "4 4 2 0 0 0 3 5 5 1 1 1", "4 4 2 0 0 0 3 5 5 1 1 1", "4 4 2 0 0 0 3 5 5 1 1 1", " 4 4 4 ", " 3 3 3 ", " 3 3 3 ", ]) # Performs all moves and checks if result fits with how it theoretically should look state = cube.get_solved() moves = ((0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)) assembled = (False, False, False, False, False, False, False, False, False, False, False, False) for m, a in zip(moves, assembled): state = cube.rotate(state, *m) assert a == cube.is_solved(state) assert cube.stringify(state) == "\n".join([ " 2 0 2 ", " 5 2 4 ", " 2 1 2 ", "4 2 4 0 2 0 5 2 5 1 2 1", "4 4 4 0 0 0 5 5 5 1 1 1", "4 3 4 0 3 0 5 3 5 1 3 1", " 3 1 3 ", " 5 3 4 ", " 3 0 3 ", ])
def test_init(self): state = cube.get_solved() assert cube.is_solved(state) assert cube.get_solved_instance().shape == (20, )
def _test_agents(self, agent: Agent): state, _, _ = cube.scramble(4) solution_found = agent.search(state, .05) for action in agent.action_queue: state = cube.rotate(state, *cube.action_space[action]) assert solution_found == cube.is_solved(state)
def _action_queue_test(state, agent, sol_found): assert all([0 <= x < cube.action_dim for x in agent.action_queue]) for action in agent.action_queue: state = cube.rotate(state, *cube.action_space[action]) assert cube.is_solved(state) == sol_found
def _step(self, state: np.ndarray) -> (int, np.ndarray, bool): action = np.random.randint(cube.action_dim) state = cube.rotate(state, *cube.action_space[action]) return action, state, cube.is_solved(state)
def _step(self, state: np.ndarray) -> (int, np.ndarray, bool): policy = torch.nn.functional.softmax(self.net(cube.as_oh(state), value=False).cpu(), dim=1).numpy().squeeze() action = np.random.choice(cube.action_dim, p=policy) if self.sample_policy else policy.argmax() state = cube.rotate(state, *cube.action_space[action]) return action, state, cube.is_solved(state)