Exemple #1
0
    def validate_data(inputs, policies, values, gamma=.95):
        """
        Validate the input, policy, value data to make sure it is of good quality.
        It must be in order and not shuffled.
        """
        from batch_cube import BatchCube
        import math

        next_state = None
        next_value = None

        for state, policy, value in zip(inputs, policies, values):
            cube = BatchCube()
            cube.load_bit_array(state)
            
            if next_state is not None:
                assert next_state.shape == state.shape
                assert np.array_equal(next_state, state), "\nstate:\n" + str(state) + "\nnext_state:\n" + str(next_state)
            if next_value is not None:
                assert round(math.log(next_value, .95)) == round(math.log(value, .95)), "next_value:" + str(next_value) + "   value:" + str(value)

            action = np.argmax(policy)
            cube.step([action])

            if value == 0 or value == gamma:
                next_value = None
                next_state = None
            else:
                next_value = value / gamma
                next_state = cube.bit_array().reshape((54, 6))
class State():
    """ This is application specfic """
    def __init__(self, internal_state=None):
        if internal_state is None:
            internal_state = BatchCube(1)
        
        self.internal_state = internal_state

    def reset_and_randomize(self, depth):
        self.internal_state = BatchCube(1)
        self.internal_state.randomize(depth)

    def next(self, action):
        next_internal_state = self.internal_state.copy()
        next_internal_state.step(action)
        return State(next_internal_state)

    def input_array(self):
        return self.internal_state.bit_array().reshape((1, 6*54))

    def calculate_priors_and_value(self, model):
        """ 
        For now, this does nothing special.  It evenly weights all actions,
        and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node.
        """
        prior = model.predict(self.input_array()).reshape((12,))
        value = .01
        return prior, value

    def key(self):
        return self.internal_state.bit_array().tobytes()

    def done(self):
        return self.internal_state.done()[0]

    def __str__(self):
        return str(self.internal_state)
class State():
    """ This is application specfic """
    def __init__(self, internal_state=None):
        if internal_state is None:
            internal_state = BatchCube(1)

        self.internal_state = internal_state

    def reset_and_randomize(self, depth):
        self.internal_state = BatchCube(1)
        self.internal_state.randomize(depth)

    def next(self, action):
        next_internal_state = self.internal_state.copy()
        next_internal_state.step(action)
        return State(next_internal_state)

    def input_array(self):
        return self.internal_state.bit_array().reshape((1, 6 * 54))

    def calculate_priors_and_value(self, model):
        """ 
        For now, this does nothing special.  It evenly weights all actions,
        and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node.
        """
        prior = model.predict(self.input_array()).reshape((12, ))
        value = .01
        return prior, value

    def key(self):
        return self.internal_state.bit_array().tobytes()

    def done(self):
        return self.internal_state.done()[0]

    def __str__(self):
        return str(self.internal_state)
Exemple #4
0
class BatchState():
    """ This is application specfic """
    def __init__(self, internal_state=None):
        if internal_state is None:
            internal_state = BatchCube(1)
        
        self.internal_state = internal_state

    def copy(self):
        return State(self.internal_state.copy())

    def import_bit_array(self, bit_array):
        color_idx = np.indices((1, 54, 6))[2]
        array = (color_idx * bit_array.reshape((1, 54, 6))).max(axis=2)
        self.internal_state = BatchCube(cube_array=array)

    def reset_and_randomize(self, depth):
        self.internal_state = BatchCube(1)
        self.internal_state.randomize(depth)

    def next(self, action):
        next_internal_state = self.internal_state.copy()
        next_internal_state.step(action)
        return State(next_internal_state)

    def input_array(self):
        return self.internal_state.bit_array().reshape((1, 6*54))

    def key(self):
        return self.internal_state.bit_array().tobytes()

    def done(self):
        return self.internal_state.done()[0]

    def __str__(self):
        return str(self.internal_state)
Exemple #5
0
import sys

sys.path.append('..')  # add parent directory to path
from batch_cube import BatchCube

MAX_DISTANCE = 6

eye12 = np.eye(12, dtype=bool)

state_dict = {}  # value =  (best_actions, distance)

print("Generating data...")
# start with solved cube
cubes = BatchCube(1)
solved_cube = cubes._cube_array[0]
key = cubes.bit_array().tobytes()
best_actions = np.zeros(12)
distance = 0
state_dict[key] = (cubes.bit_array()[0], best_actions, distance)

size = 1
for distance in range(1, MAX_DISTANCE + 1):
    print("Distance:", distance)

    # go to neighbors
    cubes.step_independent(np.arange(12))

    # record last move taken
    last_action = np.tile(np.arange(12), size)

    # find inverse of that move (using ^)
sys.path.append('..') # add parent directory to path
from batch_cube import BatchCube


MAX_DISTANCE = 6

eye12 = np.eye(12, dtype=bool)

state_dict = {} # value =  (best_actions, distance)


print("Generating data...")
# start with solved cube
cubes = BatchCube(1)
solved_cube = cubes._cube_array[0]
key = cubes.bit_array().tobytes()
best_actions = np.zeros(12)
distance = 0
state_dict[key] = (cubes.bit_array()[0], best_actions, distance)

size = 1
for distance in range(1, MAX_DISTANCE+1):
    print("Distance:", distance)
    
    # go to neighbors
    cubes.step_independent(np.arange(12))

    # record last move taken
    last_action = np.tile(np.arange(12), size)

    # find inverse of that move (using ^)
    # test solved cube under permuations
    for i in range(48):
        bc0 = BatchCube()
        bc = bc0.copy()

        bc._cube_array[0] = bc._cube_array[0][position_permutations[i]]
        bc._cube_array[0] = opp_color_permutations[i][bc._cube_array[0]]
        assert bc == bc0

    # test solved cube under permuations (bit array)
    for i in range(48):
        bc0 = BatchCube()
        bc = bc0.copy()

        bit_array = bc.bit_array()
        idx = np.indices(bc.bit_array().shape)[0]
        pos_perm = position_permutations[i][np.newaxis, :, np.newaxis]
        col_perm = color_permutations[i][np.newaxis, np.newaxis]
        bit_array = bit_array[idx, pos_perm, col_perm]
        bc.load_bit_array(bit_array)
        assert bc == bc0

    # test swap functions
    for i in range(48):
        bc1 = BatchCube(10)
        bc1.randomize(100)
        bc2 = bc1.copy()

        swap_colors_1(bc1, i)
        swap_colors_2(bc2, i)