def validate_data(inputs, policies, values, gamma=.95): """ Validate the input, policy, value data to make sure it is of good quality. It must be in order and not shuffled. """ from batch_cube import BatchCube import math next_state = None next_value = None for state, policy, value in zip(inputs, policies, values): cube = BatchCube() cube.load_bit_array(state) if next_state is not None: assert next_state.shape == state.shape assert np.array_equal(next_state, state), "\nstate:\n" + str(state) + "\nnext_state:\n" + str(next_state) if next_value is not None: assert round(math.log(next_value, .95)) == round(math.log(value, .95)), "next_value:" + str(next_value) + " value:" + str(value) action = np.argmax(policy) cube.step([action]) if value == 0 or value == gamma: next_value = None next_state = None else: next_value = value / gamma next_state = cube.bit_array().reshape((54, 6))
def __init__(self, _inner: Optional[BatchCube] = None): """ :return: A new solved puzzle cube. """ if _inner is None: self._inner_cube = BatchCube() else: self._inner_cube = _inner
def __init__(self, history=1, random_depth=None, _internal_state=None): if _internal_state is not None: self._internal_state = _internal_state else: blank_history = tuple(None for _ in range(history - 1)) cube = BatchCube(1) if random_depth is not None: cube.randomize(random_depth) self._internal_state = (cube, ) + blank_history
def process_training_data(self, inputs, policies, values, augment=True): """ Convert training data to arrays. Augment data Reshape to fit model input. """ warnings.warn( "'BaseModel.process_training_data' should not be used. The 'process_single_input' method should be reimplemented", stacklevel=2) # augment with all 48 color rotations if augment: inputs, policies, values = augment_data(inputs, policies, values) # process arrays now to save time during training if self.history == 1: inputs = inputs.reshape((-1, 54, 6)) else: # use that the inputs are in order to attach the history # use the policy/input match to determine when we reached a new game next_cube = None input_array_with_history = None input_list = [] for state, policy in zip(inputs, policies): cube = BatchCube() cube.load_bit_array(state) if next_cube is None or cube != next_cube: # blank history input_array_history = np.zeros((self.history - 1, 54, 6), dtype=bool) else: input_array_history = input_array_with_history[:-1] input_array_state = state.reshape((1, 54, 6)) input_array_with_history = np.concatenate( [input_array_state, input_array_history], axis=0) input_array = np.rollaxis(input_array_with_history, 1, 0) input_array = input_array.reshape((54, self.history * 6)) input_list.append(input_array) action = np.argmax(policy) next_cube = cube.copy() next_cube.step([action]) inputs = np.array(input_list) return inputs, policies, values
# Load BatchCube import sys sys.path.append('..') # add parent directory to path from batch_cube import BatchCube MAX_DISTANCE = 6 eye12 = np.eye(12, dtype=bool) state_dict = {} # value = (best_actions, distance) print("Generating data...") # start with solved cube cubes = BatchCube(1) solved_cube = cubes._cube_array[0] key = cubes.bit_array().tobytes() best_actions = np.zeros(12) distance = 0 state_dict[key] = (cubes.bit_array()[0], best_actions, distance) size = 1 for distance in range(1, MAX_DISTANCE + 1): print("Distance:", distance) # go to neighbors cubes.step_independent(np.arange(12)) # record last move taken last_action = np.tile(np.arange(12), size)
pprint(y) print() print("z3d = \\") pprint(z) print() np.set_printoptions( threshold=np.inf ) # allows one to see the whole array even if it is big print("neighbors = \\") pprint(neighbors) print() np.set_printoptions( threshold=1000) # allows one to see the whole array even if it is big bc = BatchCube(1) grid = np.full((5, 5, 5), -1, dtype=int) grid[x, y, z] = bc._cube_array[0] pprint(grid) bc = BatchCube(1) bc.randomize(100) grid = np.full((5, 5, 5), -1, dtype=int) grid[x, y, z] = bc._cube_array[0] print() print(bc) c = np.array(list("rygwob ")) pprint(c[grid]) bc = BatchCube(10) grid = np.full((len(bc), 5, 5, 5), -1, dtype=int)
def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth)
def import_bit_array(self, bit_array): color_idx = np.indices((1, 54, 6))[2] array = (color_idx * bit_array.reshape((1, 54, 6))).max(axis=2) self.internal_state = BatchCube(cube_array=array)
def next_state(self, node_idx, actions): batch_cube = BatchCube(cube_array = self.states[node_idx]) batch_cube.step(actions) states = batch_cube._cube_array return states
def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state
(c, r): action_from_color[c] if r else opp_actions[action_from_color[c]] for c in range(6) for r in [False, True] } color_rotation_from_action = { a: pair for pair, a in action_from_color_rotation.items() } color_from_action = [color_rotation_from_action[a][0] for a in range(12)] rotation_from_action = [color_rotation_from_action[a][1] for a in range(12)] """ Every square position on the cube can be descriped by its starting color and the set of actions which preserve it (or equivalently those which move it). This is also the same as decribing a position by its starting color and the adjacent starting colors. """ color_encoding = [] bc = BatchCube(1) starting_colors = list(bc._cube_array[0]) # replace colors with positions bc._cube_array[0] = np.arange(54) neighbor_colors = [set() for _ in range(54)] for c in range(6): a = action_from_color[c] bc.step(a) c_adjacent = [ i for i in range(54) if bc._cube_array[0][i] != i and starting_colors[i] != c ] for i in range(54): if i in c_adjacent: