Exemple #1
0
def test_cube_shuffle():
    random.seed(5)
    c = Cube()
    c.shuffle(4)
    shuffled_state = np.array([[[0, 1, 2], [3, 4, 5], [8, 17, 20]],
                               [[9, 10, 11], [12, 13, 14], [7, 16, 23]],
                               [[24, 21, 18], [25, 22, 19], [6, 15, 26]]])
    assert (c.state == shuffled_state).all()
Exemple #2
0
def test_node_init():
    model = CNN()
    c = Cube()
    state = c.state
    node = Node(state, model, .4, .1)
    assert (node.cube_moves[2] == 'right') \
        & (len(node.P.keys()) == 12)
Exemple #3
0
 def __init__(self, state, model, c, v, parent=None):
     self.parent = parent
     self.state = copy.deepcopy(state)
     self.action_taken_string = None
     self.cube_moves = [action.__name__ for action in Cube().func_list]
     self.model = model
     self.c = c
     self.v = v
     self.children = self._init_children()
     self.N = self._init_N()
     self.W = self._init_W()
     self.L = self._init_L()
     self.P = self._init_P()
def get_validation_cubes(val_num_shuffles=1, validation_count=100):
    '''
    Get set of validation cubes that will remain consistent over training period

    Parameters:
    ------------
    val_num_shuffles : int
        number of times validation cube is shuffled
    validation_count : int
        number of validation cubes

    Returns:
    ---------
    validation_cubes : list
        list of rubiks_cube.environment.cube.Cube() objects
    '''
    validation_cubes = []
    for i in range(validation_count):
        val_cube = Cube()
        val_cube.shuffle(val_num_shuffles)
        validation_cubes.append(val_cube)
    return validation_cubes
def get_val_acc(model,
                validation_cubes,
                val_max_time_steps=5,
                val_solve_method='greedy',
                mcts_c=.1,
                mcts_v=.1,
                mcts_num_search=10):
    '''
    Assess training progress on ability to solve validation cubes

    Parameters:
    -------------
    model : tf.keras.Model
    validation_cubes : list
        list of rubiks_cube.environment.cube.Cube() objects
    val_max_time_steps : int
    val_solve_method : str
        'greedy' or 'mcts'
    mcts_c : float
    mcts_v : float
    mcts_num_search : int
    Returns:
    ----------
    val_acc : float
    '''
    assert val_solve_method in ['greedy', 'mcts']
    solve_count = 0
    for val_cube in validation_cubes:
        val_cube_trial = Cube()
        val_cube_trial.state = np.copy(val_cube.state)
        if val_solve_method == 'greedy':
            solved, _, _ = greedy_solve(model, val_cube_trial,
                                        val_max_time_steps)
            solve_count += solved
        elif val_solve_method == 'mcts':
            solved, _ = mcts_solve(model, val_cube_trial, mcts_c, mcts_v,
                                   mcts_num_search)
            solve_count += solved
    return solve_count / len(validation_cubes)
Exemple #6
0
def test_greedy_solve():
    model = CNN()
    c = Cube()
    c.shuffle(5)
    solved, solved_cube, _ = greedy_solve(model, c, 5, verbose=False)
    if solved:
        assert solved_cube == Cube()
    else: 
        assert solved_cube != Cube()
Exemple #7
0
def greedy_solve(model, shuffled_cube, max_time_steps, verbose=False):
    '''
    attempt to solve cube greedily by taking action
    with highest q value in each state

    Parameters:
    -----------
    model : tf.keras.Model
        Q function approximator
    shuffled_cube : rubiks_cube.environment.cube.Cube()
        rubix cube object to be solved
    max_time_steps : int
        maximum number of time steps allowed to solve cube
    verbose : boolean
        whether to print steps taken to solve

    Returns:
    --------
    solved : boolean
    cube : rubiks_cube.environment.cube.Cube()
    solver_steps : list of action function names
    '''
    # initialize solution conditions
    solved_cube = Cube()
    solved = False
    solver_steps = []

    s0 = copy.deepcopy(shuffled_cube.state)
    st = tf.expand_dims(tf.convert_to_tensor(s0), 0) # (1, 3, 3, 3)
    # at each step takes argmax_a Q(a,s)
    for t in range(max_time_steps):
        at_index = tf.math.argmax(model(st, training=False), 1).numpy()[0]
        at = shuffled_cube.func_list[at_index]
        solver_steps.append(at.__name__)
        if verbose:
            # print action taken
            print(at)
        st1 = at()
        if shuffled_cube == solved_cube:
            # break on solve
            solved = True
            break
        st = tf.expand_dims(tf.convert_to_tensor(st1), 0)
    return solved, shuffled_cube, solver_steps
Exemple #8
0
def test_mcts_solve_call():
    model = CNN()
    shuffled_cube = Cube()
    shuffled_cube.shuffle(2)
    solved, solved_cube = mcts_solve(model,
                                     shuffled_cube,
                                     c=.1,
                                     v=.1,
                                     num_searches=100,
                                     verbose=False)
    if solved:
        assert solved_cube == Cube()
    else:
        assert solved_cube != Cube()
Exemple #9
0
def test_cube_init():
    c = Cube()
    assert (c.state == np.arange(0, 27).reshape(3, 3, 3)).all()
Exemple #10
0
def test_cube_equal():
    c1 = Cube()
    c2 = Cube()
    c2.back()
    c2.back_p()
    assert c1 == c2
Exemple #11
0
def test_cube_rotation():
    c = Cube()
    for rotation in c.func_list:
        rotation()
    assert (c.state == np.arange(0, 27).reshape(3, 3, 3)).all()
Exemple #12
0
def mcts_solve(model,
               shuffled_cube,
               c=.1,
               v=.1,
               num_searches=100,
               verbose=False):
    '''
    Attempt to solve cube via Monte carlo tree search

    Parameters:
    -----------
    model : tf.keras.Model
        Q function approximator
    shuffled_cube : rubiks_cube.environment.cube.Cube()
        rubix cube object to be solved
    c : float
        exploration hyperparameter
    v : float
        virtual loss hyperparameter
    num_searches : int
        # of iterations to search for
    verbose : false
        print output on solving progress

    Returns:
    --------
    solved : boolean
    shuffled_cube : rubiks_cube.environment.cube.Cube()
    '''
    #initial condiations
    solved = False
    solved_cube = Cube()
    cube_state = copy.deepcopy(shuffled_cube.state)
    root = Node(cube_state, model, c, v, parent=None)
    # perform search
    for i in range(num_searches):
        # 1) Selection
        if verbose:
            print("Selection")
        #start search at inital state
        current_node = root
        # traverse search tree until leaf node encountered
        # Every simulation starts from the root node and iteratively selects actions by following a tree
        # policy until an unexpanded leaf node, sτ , is reached
        has_children = (sum(
            map(lambda x: x is None, current_node.children.values())) == 0)
        while has_children:
            #calculate values for current node
            Q_st = current_node.get_Q_st()
            U_st = current_node.get_U_st()
            # select "best" action to perform
            A_st = np.argmax(U_st + Q_st)
            A_st_string = current_node.cube_moves[A_st]
            if verbose:
                print(f"Enter Selection: {A_st_string}")
            #save action taken
            current_node.action_taken_string = A_st_string
            #move to next node
            current_node = current_node.children[A_st_string]
            has_children = (sum(
                map(lambda x: x is None, current_node.children.values())) == 0)

        #check if cube has been solved
        if (current_node.state == solved_cube.state).all():
            if verbose:
                print("Cube is solved")
            solved = True
            shuffled_cube.set_state(current_node.state)
            break

        # 2) Expansion
        if verbose:
            print("Expansion")
        # Once a leaf node, sτ , is reached, the state is expanded by adding the children of s
        for move in shuffled_cube.func_list:
            shuffled_cube.set_state(current_node.state)
            move()
            new_state = copy.deepcopy(shuffled_cube.state)
            new_node = Node(new_state, model, c, v, parent=current_node)
            #add resulting states to current node's children
            current_node.children[move.__name__] = new_node

        # 3) Simulation
        if verbose:
            print("Simulation")
        # make copy of current state for simulation
        current_state = copy.deepcopy(current_node.state)
        # convert current state to tensor
        current_state = tf.expand_dims(tf.convert_to_tensor(current_state), 0)
        # find max Q for in current state
        q_current_state = model(current_state, training=False).numpy()[0].max()

        # 4) Backpropagation
        if verbose:
            print("Backpropagation")
        # update nodes with results of simulation
        current_node.update_memory(q_current_state)
        # traverse tree
        while current_node.parent is not None:
            # update all past parents with q value from current state
            current_node = current_node.parent
            current_node.update_memory(q_current_state)

        if verbose:
            if i == num_searches:
                print("Time Out")
            else:
                print('--------------')

    return solved, shuffled_cube
Exemple #13
0
        # 4) Backpropagation
        if verbose:
            print("Backpropagation")
        # update nodes with results of simulation
        current_node.update_memory(q_current_state)
        # traverse tree
        while current_node.parent is not None:
            # update all past parents with q value from current state
            current_node = current_node.parent
            current_node.update_memory(q_current_state)

        if verbose:
            if i == num_searches:
                print("Time Out")
            else:
                print('--------------')

    return solved, shuffled_cube


if __name__ == "__main__":

    model = CNN()
    shuffled_cube = Cube()
    shuffled_cube.shuffle(2)
    solved, solved_cube = mcts_solve(model,
                                     shuffled_cube,
                                     c=.1,
                                     v=.1,
                                     num_searches=100,
                                     verbose=True)
def play_autodidactic_episode(model,
                              loss_object,
                              optimizer,
                              replay_buffer,
                              num_shuffles=5,
                              max_time_steps=10,
                              exploration_rate=.1,
                              end_state_reward=1.0,
                              batch_size=16,
                              discount_factor=.9,
                              training=True):
    '''
    In a single episode, cube is shuffled up to num_shuffle times, however agent tries to solve cube at every shuffle
    and has 2 x current number of shuffles + 1 to solve

    Parameters:
    ------------
    model : tf.keras.Model
    loss_object : tf.keras.losses
    optimizer : tf.keras.optimizer
    replay_buffer : rubiks_cube.agent.replay_buffer.ReplayBuffer
    num_shuffles : int (>= 0)
    max_time_steps : int (>= 1)
    exploration_rate : float [0, 1]
    end_state_reward: float
    batch_size : int (>= 1)
    discount_factor: float
    training : boolean
    '''
    #Initialize episode cube
    episode_cube = Cube()
    #Initialize episode loss
    episode_loss = tf.keras.metrics.Mean()
    # Initialize solved cube
    solved_cube = Cube()
    for shuffle_step in range(num_shuffles):
        # Initialze shuffle step cube state
        episode_cube.shuffle(1)
        shuffle_step_cube = Cube()
        shuffle_step_cube.state = copy.deepcopy(episode_cube.state)
        #Set up training shuffle_step loss
        shuffle_step_loss = tf.keras.metrics.Mean()
        # regular training loop
        s0 = shuffle_step_cube.state
        # convert cube state into tensor to feed into model
        st = tf.expand_dims(tf.convert_to_tensor(s0), 0)  # (1, 3, 3, 3)
        # Play shuffle_step until solved or shuffle_max_time_steps is reached
        shuffle_max_time_steps = 2 * shuffle_step + 1
        for t in range(shuffle_max_time_steps):
            #with some probability select a random action a_t
            if np.random.rand() < exploration_rate:
                at_index = np.random.randint(
                    0, 12)  #WARNING: Number of possible otations
            #otherwise select a_t = max_a Q(s_t,a)
            else:
                at_index = tf.math.argmax(model(st), 1).numpy()[0]
            # Execute action a_t and observe state s_t+1 and reward r_t
            at = shuffle_step_cube.func_list[at_index]
            st1 = at()
            if shuffle_step_cube == solved_cube:
                rt = end_state_reward
            else:
                rt = 0.
            # Store transition in replay buffer, convert state to numpy for convenience
            st_numpy = st.numpy()[0]  # (3, 3, 3)
            transition = (st_numpy, at_index, rt, st1
                          )  # (np.array, int, float, np.array)
            replay_buffer.add(transition)
            #if training is enabled, update q function
            if training:
                loss = update_q_function(model, loss_object, optimizer,
                                         replay_buffer, end_state_reward,
                                         batch_size, discount_factor)
            else:
                loss = 0
            shuffle_step_loss(loss)
            #if reward state has been reached, stop shuffle_step early
            if (rt == end_state_reward):
                break
            # convert next cube state into tensor to feed into model
            st = tf.expand_dims(tf.convert_to_tensor(st1), 0)  # (1, 3, 3, 3)
        shuffle_step_loss_result = shuffle_step_loss.result()
        episode_loss(shuffle_step_loss_result)
        shuffle_step_loss.reset_states()
    episode_loss_result = episode_loss.result()
    episode_loss.reset_states()
    return episode_cube, episode_loss_result
Exemple #15
0
    # initialize exploration rate schduler
    exploration_rate_scheduler = ExplorationRateSchedule(
        **config['exploration_rate']['params'])

    try:
        # Train the model
        train_via_experience_replay(model,
                                    loss_object,
                                    optimizer,
                                    exploration_rate_scheduler,
                                    logging=True,
                                    train_log_dir=train_log_dir,
                                    **config['training_loop']['params'])
    except KeyboardInterrupt:
        print("Training got interrupted")
        #save weights as a fail safe

        save_state = Cube().state
        save_state_tensor = tf.expand_dims(tf.convert_to_tensor(save_state), 0)
        model.predict(save_state_tensor)
        model.save(model_weights_dir)

    # Save trained model weights
    save_state = Cube().state
    save_state_tensor = tf.expand_dims(tf.convert_to_tensor(save_state), 0)
    model.predict(save_state_tensor)
    model.save(model_weights_dir)

    # module MCTS
    #gpu/ container
Exemple #16
0
def test_cnn_call_shape():
    model = CNN()
    x = tf.constant(np.stack([Cube().state, Cube().state]))
    assert model(x).shape == tf.TensorShape([2,12])