def test_scramble(self): np.random.seed(42) state = cube.get_solved() state, faces, dirs = cube.scramble(1) assert not cube.is_solved(state) state = cube.get_solved() state, faces, dirs = cube.scramble(20) assert not cube.is_solved(state) for f, d in zip(reversed(faces), reversed([int(not item) for item in dirs])): state = cube.rotate(state, *(f, d)) assert cube.is_solved(state)
def test_as633(self): state = cube.as633(cube.get_solved()) target633 = list() for i in range(6): target633.append(np.ones((3, 3)) * i) target633 = np.array(target633) assert (state == target633).all()
def load(load_dir: str, logger=NullLogger(), load_best=False): """ Load a model from a configuration directory """ model_path = os.path.join(load_dir, "model.pt" if not load_best else "model-best.pt") conf_path = os.path.join(load_dir, "config.json") with open(conf_path, encoding="utf-8") as conf: try: state_dict = torch.load(model_path, map_location=gpu) except FileNotFoundError: model_path = os.path.join(load_dir, "model.pt") state_dict = torch.load(model_path, map_location=gpu) config = ModelConfig.from_json_dict(json.load(conf)) model = Model.create(config, logger) model.load_state_dict(state_dict) model.to(gpu) # First time the net is loaded, a feedforward is performed, as the first time is slow # This avoids skewing evaluation results with torch.no_grad(): model.eval() model(cube.as_oh(cube.get_solved())) model.train() return model
def search(self, state: np.ndarray, time_limit: float=None, max_states: int=None) -> bool: """Seaches according to the batched, weighted A* algorithm While there is time left, the algorithm finds the best `expansions` open states (using priority queue) with lowest cost according to the A* cost heuristic (see `self.cost`). From these, it expands to new open states according to `self.expand_batch`. """ self.tt.tick() time_limit, max_states = self.reset(time_limit, max_states) if cube.is_solved(state): return True #First node self.indices[state.tostring()], self.states[1], self.G[1] = 1, state, 0 heapq.heappush( self.open_queue, (0, 1) ) #Given cost 0: Should not matter; just to avoid np.empty weirdness while self.tt.tock() < time_limit and len(self) + self.expansions * cube.action_dim <= max_states: self.tt.profile("Remove nodes from open priority queue") n_remove = min( len(self.open_queue), self.expansions ) expand_idcs = np.array([ heapq.heappop(self.open_queue)[1] for _ in range(n_remove) ], dtype=int) self.tt.end_profile("Remove nodes from open priority queue") is_won = self.expand_batch(expand_idcs) if is_won: #🦀🦀🦀WE DID IT BOIS🦀🦀🦀 i = self.indices[ cube.get_solved().tostring() ] #Build action queue while i != 1: self.action_queue.appendleft( self.parent_actions[i] ) i = self.parents[i] return True return False
def __init__(self, evaluations: np.ndarray, games: int, depth: int, extra_evals: int, reward_method: str, logger: Logger = NullLogger()): """Initialize containers mostly :param np.ndarray evaluations: array of the evaluations performed on the model. Used for the more intensive analysis :param int depth: Rollout depth :param extra_evals: If != 0, extra evaluations are added for the first `exta_evals` rollouts """ self.games = games self.depth = depth self.depths = np.arange(depth) self.extra_evals = min(evaluations[-1] if len(evaluations) else 0, extra_evals) #Wont add evals in the future (or if no evals are needed) self.evaluations = np.unique( np.append(evaluations, range( self.extra_evals )) ) self.reward_method = reward_method self.orig_params = None self.params = None self.first_states = np.stack(( cube.get_solved(), *cube.multi_rotate(cube.repeat_state(cube.get_solved(), cube.action_dim), *cube.iter_actions()) )) self.first_states = cube.as_oh( self.first_states ) self.first_state_values = list() self.substate_val_stds = list() self.avg_value_targets = list() self.param_changes = list() self.param_total_changes = list() self.policy_entropies = list() self.rollout_policy = list() self.log = logger self.log.verbose(f"Analysis of this training was enabled. Extra analysis is done for evaluations and for first {extra_evals} rollouts")
def _multi_rotate_test(self): states = np.array([cube.get_solved()] * 5) for _ in range(10): faces, dirs = np.random.randint(0, 6, 5), np.random.randint(0, 1, 5) states_classic = np.array([ cube.rotate(state, face, d) for state, face, d in zip(states, faces, dirs) ]) states = cube.multi_rotate(states, faces, dirs) assert (states_classic == states).all()
def _get_states(shape: tuple): shape = (*shape, *cube.shape()) if len(shape) > 1 else (1, *shape, *cube.shape()) n, n_states = shape[0], shape[1] states = np.empty(shape, dtype=cube.dtype) states[0] = cube.repeat_state(cube.get_solved(), n_states) for i in range(1, len(states)): faces, dirs = np.random.randint(0, 6, n_states), np.random.randint( 0, 2, n_states) states[i] = cube.multi_rotate(states[i - 1], faces, dirs) return states
def rotate(self, n: int): self.log.section( f"Benchmarking {TickTock.thousand_seps(n)} single rotations, {_repstr()}" ) faces, dirs = np.random.randint(0, 6, n), np.random.randint(0, 2, n) state = cube.get_solved() pname = f"Single rotation, {_repstr()}" for f, d in zip(faces, dirs): self.tt.profile(pname) state = cube.rotate(state, f, d) self.tt.end_profile() self._log_method_results("Average rotation time", pname)
def multi_rotate(self, n: int, n_states: int): self.log.section( f"Benchmarking {TickTock.thousand_seps(n)} multi rotations of " f"{TickTock.thousand_seps(n_states)} states each, {_repstr()}") states = cube.repeat_state(cube.get_solved(), n_states) faces, dirs = np.random.randint(0, 6, (n, n_states)), np.random.randint( 0, 2, (n, n_states)) pname = f"{TickTock.thousand_seps(n_states)} rotations, {_repstr()}" for f, d in zip(faces, dirs): self.tt.profile(pname) states = cube.multi_rotate(states, f, d) self.tt.end_profile() self._log_method_results("Average rotation time", pname, n_states)
def _get_correctness(self): state = cube.get_solved() state = cube.rotate(state, 0, True) state = cube.rotate(state, 5, False) correctness = torch.tensor([ [1, 1, 1, 1, -1, -1, -1, 1], [-1, 1, 1, 1, 1, 1, -1, -1], [-1, -1, -1, -1, -1, 1, 1, 1], [-1, -1, -1, -1, -1, 1, 1, 1], [-1, 1, 1, 1, 1, 1, -1, -1], [1, 1, -1, -1, -1, 1, 1, 1], ], device=gpu) assert torch.all(correctness == cube.as_correct( torch.from_numpy(state).unsqueeze(0)))
def test_as_oh(self): state = cube.get_solved() oh = cube.as_oh(state) supposed_state = torch.zeros(20, 24, device=gpu) corners = [ get_corner_pos(c, o) for c, o in zip(SimpleState.corners.tolist(), SimpleState.corner_orientations.tolist()) ] supposed_state[torch.arange(8), corners] = 1 sides = [ get_side_pos(s, o) for s, o in zip(SimpleState.sides.tolist(), SimpleState.side_orientations.tolist()) ] supposed_state[torch.arange(8, 20), sides] = 1 assert (supposed_state.flatten() == oh).all()
def _rotation_tests(self): state = cube.get_solved() for action in cube.action_space: state = cube.rotate(state, *action) # Tests that stringify and by extensions as633 works on assembled state = cube.get_solved() assert cube.stringify(state) == "\n".join([ " 2 2 2 ", " 2 2 2 ", " 2 2 2 ", "4 4 4 0 0 0 5 5 5 1 1 1", "4 4 4 0 0 0 5 5 5 1 1 1", "4 4 4 0 0 0 5 5 5 1 1 1", " 3 3 3 ", " 3 3 3 ", " 3 3 3 ", ]) # Performs moves and checks if are assembled/not assembled as expected moves = ((0, 1), (0, 0), (0, 1), (1, 1), (2, 0), (3, 0)) assembled = (False, True, False, False, False, False) for m, a in zip(moves, assembled): state = cube.rotate(state, *m) assert a == cube.is_solved(state) # Tests more moves moves = ((3, 1), (2, 1), (1, 0), (0, 0)) assembled = (False, False, False, True) for m, a in zip(moves, assembled): state = cube.rotate(state, *m) assert a == cube.is_solved(state) # Performs move and checks if it fits with how the string representation would look state = cube.get_solved() state = cube.rotate(state, *(0, 1)) assert cube.stringify(state) == "\n".join([ " 2 2 2 ", " 2 2 2 ", " 5 5 5 ", "4 4 2 0 0 0 3 5 5 1 1 1", "4 4 2 0 0 0 3 5 5 1 1 1", "4 4 2 0 0 0 3 5 5 1 1 1", " 4 4 4 ", " 3 3 3 ", " 3 3 3 ", ]) # Performs all moves and checks if result fits with how it theoretically should look state = cube.get_solved() moves = ((0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)) assembled = (False, False, False, False, False, False, False, False, False, False, False, False) for m, a in zip(moves, assembled): state = cube.rotate(state, *m) assert a == cube.is_solved(state) assert cube.stringify(state) == "\n".join([ " 2 0 2 ", " 5 2 4 ", " 2 1 2 ", "4 2 4 0 2 0 5 2 5 1 2 1", "4 4 4 0 0 0 5 5 5 1 1 1", "4 3 4 0 3 0 5 3 5 1 3 1", " 3 1 3 ", " 5 3 4 ", " 3 0 3 ", ])
def test_init(self): state = cube.get_solved() assert cube.is_solved(state) assert cube.get_solved_instance().shape == (20, )
def solve(depth: int, c: float, time_limit: float): state, f, d = cube.scramble(depth, True) searcher = MCTS(net, c=c, search_graph=False) is_solved = searcher.search(state, time_limit) assert is_solved == (cube.get_solved().tostring() in searcher.indices) return is_solved, len(searcher.indices)