class PuzzleCube: """ An instance of a PuzzleCube. The interface treats each instance of this class as immutable. """ def __init__(self, _inner: Optional[BatchCube] = None): """ :return: A new solved puzzle cube. """ if _inner is None: self._inner_cube = BatchCube() else: self._inner_cube = _inner def copy(self) -> "PuzzleCube": return PuzzleCube(_inner=self._inner_cube.copy()) def scramble(self, distance: int) -> "PuzzleCube": """ Scrambles a copy of the cube a set number of random moves. :param distance: Number of random moves to scramble :return: A copy of the cube scrambled. """ assert (distance >= 0) inner = self._inner_cube.copy() inner.randomize(distance) return PuzzleCube(_inner=inner) def move(self, action: str) -> "PuzzleCube": """ Perform action on a copy of the cube. :param action: One of "L", "L'", "R", "R'", "U", "U'", "D", "D'", "F", "F'", "B", "B'" :return: A copy of the cube with one action performed. """ assert (action in valid_moves) move_index = valid_moves.index(action) inner = self._inner_cube.copy() inner.step(move_index) return PuzzleCube(_inner=inner) def is_solved(self) -> bool: """ :return: Whether or not the cube is solved. """ return self._inner_cube.done()[0] def __str__(self) -> str: """ :return: A flat string representation of the cube. """ return str(self._inner_cube) def __repr__(self) -> str: return str(self._inner_cube)
class State(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6*54)) def calculate_priors_and_value(self, model): """ For now, this does nothing special. It evenly weights all actions, and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node. """ prior = model.predict(self.input_array()).reshape((12,)) value = .01 return prior, value def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
class State(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6 * 54)) def calculate_priors_and_value(self, model): """ For now, this does nothing special. It evenly weights all actions, and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node. """ prior = model.predict(self.input_array()).reshape((12, )) value = .01 return prior, value def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
class BatchState(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def copy(self): return State(self.internal_state.copy()) def import_bit_array(self, bit_array): color_idx = np.indices((1, 54, 6))[2] array = (color_idx * bit_array.reshape((1, 54, 6))).max(axis=2) self.internal_state = BatchCube(cube_array=array) def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6*54)) def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
# Rebuild dictionary state_dict = { b.tobytes(): (b, a, int(d)) for b, a, d in zip(bits, best_actions, distances) } print("Testing data...") # Test data types for k, v in state_dict.items(): assert v[0].dtype == bool assert v[1].dtype == bool break # Test data import numpy as np for i in range(1000): test_cube = BatchCube(1) test_cube.randomize(1 + (i % MAX_DISTANCE)) _, best_actions, distance = state_dict[test_cube.bit_array().tobytes()] for _ in range(distance): assert not test_cube.done()[0] action = np.random.choice(12, p=best_actions / np.sum(best_actions)) test_cube.step([action]) _, best_actions, _ = state_dict[test_cube.bit_array().tobytes()] assert test_cube.done()[0] print("Passed all tests")
print("Testing data...") # Test data types for k, v in state_dict.items(): assert v[0].dtype == bool assert v[1].dtype == bool break # Test data import numpy as np for i in range(1000): test_cube = BatchCube(1) test_cube.randomize(1 + (i % MAX_DISTANCE)) _, best_actions, distance = state_dict[test_cube.bit_array().tobytes()] for _ in range(distance): assert not test_cube.done()[0] action = np.random.choice(12, p=best_actions/np.sum(best_actions)) test_cube.step([action]) _, best_actions, _ = state_dict[test_cube.bit_array().tobytes()] assert test_cube.done()[0] print("Passed all tests")