class PuzzleCube: """ An instance of a PuzzleCube. The interface treats each instance of this class as immutable. """ def __init__(self, _inner: Optional[BatchCube] = None): """ :return: A new solved puzzle cube. """ if _inner is None: self._inner_cube = BatchCube() else: self._inner_cube = _inner def copy(self) -> "PuzzleCube": return PuzzleCube(_inner=self._inner_cube.copy()) def scramble(self, distance: int) -> "PuzzleCube": """ Scrambles a copy of the cube a set number of random moves. :param distance: Number of random moves to scramble :return: A copy of the cube scrambled. """ assert (distance >= 0) inner = self._inner_cube.copy() inner.randomize(distance) return PuzzleCube(_inner=inner) def move(self, action: str) -> "PuzzleCube": """ Perform action on a copy of the cube. :param action: One of "L", "L'", "R", "R'", "U", "U'", "D", "D'", "F", "F'", "B", "B'" :return: A copy of the cube with one action performed. """ assert (action in valid_moves) move_index = valid_moves.index(action) inner = self._inner_cube.copy() inner.step(move_index) return PuzzleCube(_inner=inner) def is_solved(self) -> bool: """ :return: Whether or not the cube is solved. """ return self._inner_cube.done()[0] def __str__(self) -> str: """ :return: A flat string representation of the cube. """ return str(self._inner_cube) def __repr__(self) -> str: return str(self._inner_cube)
def process_training_data(self, inputs, policies, values, augment=True): """ Convert training data to arrays. Augment data Reshape to fit model input. """ warnings.warn( "'BaseModel.process_training_data' should not be used. The 'process_single_input' method should be reimplemented", stacklevel=2) # augment with all 48 color rotations if augment: inputs, policies, values = augment_data(inputs, policies, values) # process arrays now to save time during training if self.history == 1: inputs = inputs.reshape((-1, 54, 6)) else: # use that the inputs are in order to attach the history # use the policy/input match to determine when we reached a new game next_cube = None input_array_with_history = None input_list = [] for state, policy in zip(inputs, policies): cube = BatchCube() cube.load_bit_array(state) if next_cube is None or cube != next_cube: # blank history input_array_history = np.zeros((self.history - 1, 54, 6), dtype=bool) else: input_array_history = input_array_with_history[:-1] input_array_state = state.reshape((1, 54, 6)) input_array_with_history = np.concatenate( [input_array_state, input_array_history], axis=0) input_array = np.rollaxis(input_array_with_history, 1, 0) input_array = input_array.reshape((54, self.history * 6)) input_list.append(input_array) action = np.argmax(policy) next_cube = cube.copy() next_cube.step([action]) inputs = np.array(input_list) return inputs, policies, values
class State(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6*54)) def calculate_priors_and_value(self, model): """ For now, this does nothing special. It evenly weights all actions, and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node. """ prior = model.predict(self.input_array()).reshape((12,)) value = .01 return prior, value def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
class State(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6 * 54)) def calculate_priors_and_value(self, model): """ For now, this does nothing special. It evenly weights all actions, and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node. """ prior = model.predict(self.input_array()).reshape((12, )) value = .01 return prior, value def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
class BatchState(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def copy(self): return State(self.internal_state.copy()) def import_bit_array(self, bit_array): color_idx = np.indices((1, 54, 6))[2] array = (color_idx * bit_array.reshape((1, 54, 6))).max(axis=2) self.internal_state = BatchCube(cube_array=array) def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6*54)) def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
) # allows one to see the whole array even if it is big pprint(np.array(position_permutations)) print() print("opp_action_permutations = \\") pprint(np.array(opp_action_permutations)) print() print("action_permutations = \\") pprint(np.array(action_permutations)) # test opposites for i in range(48): bc0 = BatchCube() bc0.randomize(100) bc = bc0.copy() bc._cube_array[0] = bc._cube_array[0][position_permutations[i]] bc._cube_array[0] = bc._cube_array[0][opp_position_permutations[i]] assert bc == bc0 bc._cube_array[0] = color_permutations[i][bc._cube_array[0]] bc._cube_array[0] = opp_color_permutations[i][bc._cube_array[0]] assert bc == bc0 policy0 = np.random.uniform(size=12) policy1 = policy0.copy() policy1 = policy1[action_permutations[i]] policy1 = policy1[opp_action_permutations[i]] assert np.array_equal(policy0, policy1)