def validate_data(inputs, policies, values, gamma=.95): """ Validate the input, policy, value data to make sure it is of good quality. It must be in order and not shuffled. """ from batch_cube import BatchCube import math next_state = None next_value = None for state, policy, value in zip(inputs, policies, values): cube = BatchCube() cube.load_bit_array(state) if next_state is not None: assert next_state.shape == state.shape assert np.array_equal(next_state, state), "\nstate:\n" + str(state) + "\nnext_state:\n" + str(next_state) if next_value is not None: assert round(math.log(next_value, .95)) == round(math.log(value, .95)), "next_value:" + str(next_value) + " value:" + str(value) action = np.argmax(policy) cube.step([action]) if value == 0 or value == gamma: next_value = None next_state = None else: next_value = value / gamma next_state = cube.bit_array().reshape((54, 6))
class State(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6*54)) def calculate_priors_and_value(self, model): """ For now, this does nothing special. It evenly weights all actions, and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node. """ prior = model.predict(self.input_array()).reshape((12,)) value = .01 return prior, value def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
class State(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6 * 54)) def calculate_priors_and_value(self, model): """ For now, this does nothing special. It evenly weights all actions, and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node. """ prior = model.predict(self.input_array()).reshape((12, )) value = .01 return prior, value def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
class BatchState(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def copy(self): return State(self.internal_state.copy()) def import_bit_array(self, bit_array): color_idx = np.indices((1, 54, 6))[2] array = (color_idx * bit_array.reshape((1, 54, 6))).max(axis=2) self.internal_state = BatchCube(cube_array=array) def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6*54)) def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
import sys sys.path.append('..') # add parent directory to path from batch_cube import BatchCube MAX_DISTANCE = 6 eye12 = np.eye(12, dtype=bool) state_dict = {} # value = (best_actions, distance) print("Generating data...") # start with solved cube cubes = BatchCube(1) solved_cube = cubes._cube_array[0] key = cubes.bit_array().tobytes() best_actions = np.zeros(12) distance = 0 state_dict[key] = (cubes.bit_array()[0], best_actions, distance) size = 1 for distance in range(1, MAX_DISTANCE + 1): print("Distance:", distance) # go to neighbors cubes.step_independent(np.arange(12)) # record last move taken last_action = np.tile(np.arange(12), size) # find inverse of that move (using ^)
sys.path.append('..') # add parent directory to path from batch_cube import BatchCube MAX_DISTANCE = 6 eye12 = np.eye(12, dtype=bool) state_dict = {} # value = (best_actions, distance) print("Generating data...") # start with solved cube cubes = BatchCube(1) solved_cube = cubes._cube_array[0] key = cubes.bit_array().tobytes() best_actions = np.zeros(12) distance = 0 state_dict[key] = (cubes.bit_array()[0], best_actions, distance) size = 1 for distance in range(1, MAX_DISTANCE+1): print("Distance:", distance) # go to neighbors cubes.step_independent(np.arange(12)) # record last move taken last_action = np.tile(np.arange(12), size) # find inverse of that move (using ^)
# test solved cube under permuations for i in range(48): bc0 = BatchCube() bc = bc0.copy() bc._cube_array[0] = bc._cube_array[0][position_permutations[i]] bc._cube_array[0] = opp_color_permutations[i][bc._cube_array[0]] assert bc == bc0 # test solved cube under permuations (bit array) for i in range(48): bc0 = BatchCube() bc = bc0.copy() bit_array = bc.bit_array() idx = np.indices(bc.bit_array().shape)[0] pos_perm = position_permutations[i][np.newaxis, :, np.newaxis] col_perm = color_permutations[i][np.newaxis, np.newaxis] bit_array = bit_array[idx, pos_perm, col_perm] bc.load_bit_array(bit_array) assert bc == bc0 # test swap functions for i in range(48): bc1 = BatchCube(10) bc1.randomize(100) bc2 = bc1.copy() swap_colors_1(bc1, i) swap_colors_2(bc2, i)