Esempio n. 1
0
    def validate_data(inputs, policies, values, gamma=.95):
        """
        Validate the input, policy, value data to make sure it is of good quality.
        It must be in order and not shuffled.
        """
        from batch_cube import BatchCube
        import math

        next_state = None
        next_value = None

        for state, policy, value in zip(inputs, policies, values):
            cube = BatchCube()
            cube.load_bit_array(state)
            
            if next_state is not None:
                assert next_state.shape == state.shape
                assert np.array_equal(next_state, state), "\nstate:\n" + str(state) + "\nnext_state:\n" + str(next_state)
            if next_value is not None:
                assert round(math.log(next_value, .95)) == round(math.log(value, .95)), "next_value:" + str(next_value) + "   value:" + str(value)

            action = np.argmax(policy)
            cube.step([action])

            if value == 0 or value == gamma:
                next_value = None
                next_state = None
            else:
                next_value = value / gamma
                next_state = cube.bit_array().reshape((54, 6))
Esempio n. 2
0
    def process_training_data(self, inputs, policies, values, augment=True):
        """
        Convert training data to arrays.
        Augment data
        Reshape to fit model input.
        """
        warnings.warn(
            "'BaseModel.process_training_data' should not be used.  The 'process_single_input' method should be reimplemented",
            stacklevel=2)
        # augment with all 48 color rotations
        if augment:
            inputs, policies, values = augment_data(inputs, policies, values)

        # process arrays now to save time during training
        if self.history == 1:
            inputs = inputs.reshape((-1, 54, 6))
        else:
            # use that the inputs are in order to attach the history
            # use the policy/input match to determine when we reached a new game
            next_cube = None
            input_array_with_history = None
            input_list = []
            for state, policy in zip(inputs, policies):
                cube = BatchCube()
                cube.load_bit_array(state)

                if next_cube is None or cube != next_cube:
                    # blank history
                    input_array_history = np.zeros((self.history - 1, 54, 6),
                                                   dtype=bool)
                else:
                    input_array_history = input_array_with_history[:-1]

                input_array_state = state.reshape((1, 54, 6))
                input_array_with_history = np.concatenate(
                    [input_array_state, input_array_history], axis=0)

                input_array = np.rollaxis(input_array_with_history, 1, 0)
                input_array = input_array.reshape((54, self.history * 6))
                input_list.append(input_array)

                action = np.argmax(policy)
                next_cube = cube.copy()
                next_cube.step([action])

            inputs = np.array(input_list)

        return inputs, policies, values
        bc._cube_array[0] = bc._cube_array[0][position_permutations[i]]
        bc._cube_array[0] = opp_color_permutations[i][bc._cube_array[0]]
        assert bc == bc0

    # test solved cube under permuations (bit array)
    for i in range(48):
        bc0 = BatchCube()
        bc = bc0.copy()

        bit_array = bc.bit_array()
        idx = np.indices(bc.bit_array().shape)[0]
        pos_perm = position_permutations[i][np.newaxis, :, np.newaxis]
        col_perm = color_permutations[i][np.newaxis, np.newaxis]
        bit_array = bit_array[idx, pos_perm, col_perm]
        bc.load_bit_array(bit_array)
        assert bc == bc0

    # test swap functions
    for i in range(48):
        bc1 = BatchCube(10)
        bc1.randomize(100)
        bc2 = bc1.copy()

        swap_colors_1(bc1, i)
        swap_colors_2(bc2, i)

        assert bc1 == bc2, "\n" + str(bc1) + "\n" + str(bc2)
    """
    # test action permuations
    for i in range(48):