예제 #1
0
class PuzzleCube:
    """
    An instance of a PuzzleCube.  The interface treats each instance of this class as immutable.
    """
    def __init__(self, _inner: Optional[BatchCube] = None):
        """
        :return: A new solved puzzle cube.
        """

        if _inner is None:
            self._inner_cube = BatchCube()
        else:
            self._inner_cube = _inner

    def copy(self) -> "PuzzleCube":
        return PuzzleCube(_inner=self._inner_cube.copy())

    def scramble(self, distance: int) -> "PuzzleCube":
        """
        Scrambles a copy of the cube a set number of random moves.
        :param distance: Number of random moves to scramble
        :return: A copy of the cube scrambled.
        """
        assert (distance >= 0)

        inner = self._inner_cube.copy()
        inner.randomize(distance)
        return PuzzleCube(_inner=inner)

    def move(self, action: str) -> "PuzzleCube":
        """
        Perform action on a copy of the cube.
        :param action: One of "L", "L'", "R", "R'", "U", "U'", "D", "D'", "F", "F'", "B", "B'"
        :return: A copy of the cube with one action performed.
        """
        assert (action in valid_moves)

        move_index = valid_moves.index(action)

        inner = self._inner_cube.copy()
        inner.step(move_index)
        return PuzzleCube(_inner=inner)

    def is_solved(self) -> bool:
        """
        :return: Whether or not the cube is solved.
        """
        return self._inner_cube.done()[0]

    def __str__(self) -> str:
        """
        :return: A flat string representation of the cube.
        """
        return str(self._inner_cube)

    def __repr__(self) -> str:
        return str(self._inner_cube)
예제 #2
0
    def process_training_data(self, inputs, policies, values, augment=True):
        """
        Convert training data to arrays.
        Augment data
        Reshape to fit model input.
        """
        warnings.warn(
            "'BaseModel.process_training_data' should not be used.  The 'process_single_input' method should be reimplemented",
            stacklevel=2)
        # augment with all 48 color rotations
        if augment:
            inputs, policies, values = augment_data(inputs, policies, values)

        # process arrays now to save time during training
        if self.history == 1:
            inputs = inputs.reshape((-1, 54, 6))
        else:
            # use that the inputs are in order to attach the history
            # use the policy/input match to determine when we reached a new game
            next_cube = None
            input_array_with_history = None
            input_list = []
            for state, policy in zip(inputs, policies):
                cube = BatchCube()
                cube.load_bit_array(state)

                if next_cube is None or cube != next_cube:
                    # blank history
                    input_array_history = np.zeros((self.history - 1, 54, 6),
                                                   dtype=bool)
                else:
                    input_array_history = input_array_with_history[:-1]

                input_array_state = state.reshape((1, 54, 6))
                input_array_with_history = np.concatenate(
                    [input_array_state, input_array_history], axis=0)

                input_array = np.rollaxis(input_array_with_history, 1, 0)
                input_array = input_array.reshape((54, self.history * 6))
                input_list.append(input_array)

                action = np.argmax(policy)
                next_cube = cube.copy()
                next_cube.step([action])

            inputs = np.array(input_list)

        return inputs, policies, values
class State():
    """ This is application specfic """
    def __init__(self, internal_state=None):
        if internal_state is None:
            internal_state = BatchCube(1)
        
        self.internal_state = internal_state

    def reset_and_randomize(self, depth):
        self.internal_state = BatchCube(1)
        self.internal_state.randomize(depth)

    def next(self, action):
        next_internal_state = self.internal_state.copy()
        next_internal_state.step(action)
        return State(next_internal_state)

    def input_array(self):
        return self.internal_state.bit_array().reshape((1, 6*54))

    def calculate_priors_and_value(self, model):
        """ 
        For now, this does nothing special.  It evenly weights all actions,
        and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node.
        """
        prior = model.predict(self.input_array()).reshape((12,))
        value = .01
        return prior, value

    def key(self):
        return self.internal_state.bit_array().tobytes()

    def done(self):
        return self.internal_state.done()[0]

    def __str__(self):
        return str(self.internal_state)
class State():
    """ This is application specfic """
    def __init__(self, internal_state=None):
        if internal_state is None:
            internal_state = BatchCube(1)

        self.internal_state = internal_state

    def reset_and_randomize(self, depth):
        self.internal_state = BatchCube(1)
        self.internal_state.randomize(depth)

    def next(self, action):
        next_internal_state = self.internal_state.copy()
        next_internal_state.step(action)
        return State(next_internal_state)

    def input_array(self):
        return self.internal_state.bit_array().reshape((1, 6 * 54))

    def calculate_priors_and_value(self, model):
        """ 
        For now, this does nothing special.  It evenly weights all actions,
        and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node.
        """
        prior = model.predict(self.input_array()).reshape((12, ))
        value = .01
        return prior, value

    def key(self):
        return self.internal_state.bit_array().tobytes()

    def done(self):
        return self.internal_state.done()[0]

    def __str__(self):
        return str(self.internal_state)
예제 #5
0
class BatchState():
    """ This is application specfic """
    def __init__(self, internal_state=None):
        if internal_state is None:
            internal_state = BatchCube(1)
        
        self.internal_state = internal_state

    def copy(self):
        return State(self.internal_state.copy())

    def import_bit_array(self, bit_array):
        color_idx = np.indices((1, 54, 6))[2]
        array = (color_idx * bit_array.reshape((1, 54, 6))).max(axis=2)
        self.internal_state = BatchCube(cube_array=array)

    def reset_and_randomize(self, depth):
        self.internal_state = BatchCube(1)
        self.internal_state.randomize(depth)

    def next(self, action):
        next_internal_state = self.internal_state.copy()
        next_internal_state.step(action)
        return State(next_internal_state)

    def input_array(self):
        return self.internal_state.bit_array().reshape((1, 6*54))

    def key(self):
        return self.internal_state.bit_array().tobytes()

    def done(self):
        return self.internal_state.done()[0]

    def __str__(self):
        return str(self.internal_state)
    )  # allows one to see the whole array even if it is big
    pprint(np.array(position_permutations))
    print()

    print("opp_action_permutations = \\")
    pprint(np.array(opp_action_permutations))
    print()

    print("action_permutations = \\")
    pprint(np.array(action_permutations))

    # test opposites
    for i in range(48):
        bc0 = BatchCube()
        bc0.randomize(100)
        bc = bc0.copy()

        bc._cube_array[0] = bc._cube_array[0][position_permutations[i]]
        bc._cube_array[0] = bc._cube_array[0][opp_position_permutations[i]]
        assert bc == bc0

        bc._cube_array[0] = color_permutations[i][bc._cube_array[0]]
        bc._cube_array[0] = opp_color_permutations[i][bc._cube_array[0]]
        assert bc == bc0

        policy0 = np.random.uniform(size=12)
        policy1 = policy0.copy()
        policy1 = policy1[action_permutations[i]]
        policy1 = policy1[opp_action_permutations[i]]
        assert np.array_equal(policy0, policy1)