def validate_data(inputs, policies, values, gamma=.95): """ Validate the input, policy, value data to make sure it is of good quality. It must be in order and not shuffled. """ from batch_cube import BatchCube import math next_state = None next_value = None for state, policy, value in zip(inputs, policies, values): cube = BatchCube() cube.load_bit_array(state) if next_state is not None: assert next_state.shape == state.shape assert np.array_equal(next_state, state), "\nstate:\n" + str(state) + "\nnext_state:\n" + str(next_state) if next_value is not None: assert round(math.log(next_value, .95)) == round(math.log(value, .95)), "next_value:" + str(next_value) + " value:" + str(value) action = np.argmax(policy) cube.step([action]) if value == 0 or value == gamma: next_value = None next_state = None else: next_value = value / gamma next_state = cube.bit_array().reshape((54, 6))
def process_training_data(self, inputs, policies, values, augment=True): """ Convert training data to arrays. Augment data Reshape to fit model input. """ warnings.warn( "'BaseModel.process_training_data' should not be used. The 'process_single_input' method should be reimplemented", stacklevel=2) # augment with all 48 color rotations if augment: inputs, policies, values = augment_data(inputs, policies, values) # process arrays now to save time during training if self.history == 1: inputs = inputs.reshape((-1, 54, 6)) else: # use that the inputs are in order to attach the history # use the policy/input match to determine when we reached a new game next_cube = None input_array_with_history = None input_list = [] for state, policy in zip(inputs, policies): cube = BatchCube() cube.load_bit_array(state) if next_cube is None or cube != next_cube: # blank history input_array_history = np.zeros((self.history - 1, 54, 6), dtype=bool) else: input_array_history = input_array_with_history[:-1] input_array_state = state.reshape((1, 54, 6)) input_array_with_history = np.concatenate( [input_array_state, input_array_history], axis=0) input_array = np.rollaxis(input_array_with_history, 1, 0) input_array = input_array.reshape((54, self.history * 6)) input_list.append(input_array) action = np.argmax(policy) next_cube = cube.copy() next_cube.step([action]) inputs = np.array(input_list) return inputs, policies, values
bc._cube_array[0] = bc._cube_array[0][position_permutations[i]] bc._cube_array[0] = opp_color_permutations[i][bc._cube_array[0]] assert bc == bc0 # test solved cube under permuations (bit array) for i in range(48): bc0 = BatchCube() bc = bc0.copy() bit_array = bc.bit_array() idx = np.indices(bc.bit_array().shape)[0] pos_perm = position_permutations[i][np.newaxis, :, np.newaxis] col_perm = color_permutations[i][np.newaxis, np.newaxis] bit_array = bit_array[idx, pos_perm, col_perm] bc.load_bit_array(bit_array) assert bc == bc0 # test swap functions for i in range(48): bc1 = BatchCube(10) bc1.randomize(100) bc2 = bc1.copy() swap_colors_1(bc1, i) swap_colors_2(bc2, i) assert bc1 == bc2, "\n" + str(bc1) + "\n" + str(bc2) """ # test action permuations for i in range(48):