Ejemplo n.º 1
0
def revealed_q():
    width = 3
    height = 3
    num_mines = 1
    sim = Minesweeper(num_mines, width, height)
    #features = sim.reset()
    Q = np.ones((sim.num_states, sim.num_actions))

    policy, Q, steps, reward, r, num_revealed = q_learning(
            sim, gamma=gamma,alpha = alpha, epsilon=epsilon,
            num_episodes=1, initial_Q=Q)
    board_covered = num_revealed / (width * height)
    return board_covered
Ejemplo n.º 2
0
def revealed_mc():
    width = 3
    height = 3
    num_mines = 1
    sim = Minesweeper(num_mines, width, height)
    #features = sim.reset()
    Q = np.ones((sim.num_states, sim.num_actions)) 
    policy, Q, steps,reward, num_rev = monte_carlo_iterative_optimisation(
    sim, gamma, epsilon, alpha, num_episodes = 1, max_steps=20,
    initial_Q=Q, default_value=0)
    #print(r, reward)
    board_covered = num_rev / (width * height)
    return board_covered 
Ejemplo n.º 3
0
def run_q():
    width = 3
    height = 3
    num_mines = 1
    sim = Minesweeper(num_mines, width, height)
    # intial Q table
    Q = np.ones((sim.num_states, sim.num_actions))

    # running simulatino with q learning
    policy, Q, steps, reward, r, num_revealed = q_learning(sim,
                                                           gamma=gamma,
                                                           alpha=alpha,
                                                           epsilon=epsilon,
                                                           num_episodes=1,
                                                           initial_Q=Q)
    return reward
Ejemplo n.º 4
0
def steps_sarsa():
    width = 3
    height = 3
    num_mines = 1
    sim = Minesweeper(num_mines, width, height)

    # initial Q table
    Q = np.ones((sim.num_states, sim.num_actions))

    # running simulatino with sarsa
    policy, Q, steps, reward, r, num = sarsa(sim,
                                             gamma=gamma,
                                             alpha=alpha,
                                             epsilon=epsilon,
                                             num_episodes=1,
                                             initial_Q=Q)
    return steps
Ejemplo n.º 5
0
def steps_mc():
    width = 3
    height = 3
    num_mines = 1
    sim = Minesweeper(num_mines, width, height)
    #features = sim.reset()
    Q = np.ones((sim.num_states, sim.num_actions))

    policy, Q, steps, reward, num_rev = monte_carlo_iterative_optimisation(
        sim,
        gamma,
        epsilon,
        alpha,
        num_episodes=1,
        max_steps=40,
        initial_Q=Q,
        default_value=0)
    return steps
Ejemplo n.º 6
0
def main():
    width = 3
    height = 3
    num_mines = 1

    sim = Minesweeper(num_mines, width, height)
    features = sim.reset()

    action = np.random.choice([0, 1])
    qweights = np.zeros((2, 80))
    for i in range(80):
        qweights[1] = 1
    #print("\tAction choice is: %d (meaning %s)" \
    #    % (action, sim.action_names[action],) )
    #rewards_arr = []
    sim = Minesweeper(num_mines, width, height)
    features = sim.reset()
    action = np.random.choice([0, 1])
    while sim.is_terminal() == False:
        features, reward, num_revealed = sim.next(action)
    return reward
Ejemplo n.º 7
0
def random():
    width = 3
    height = 3
    num_mines = 1
    
    sim = Minesweeper(num_mines, width, height)
    
    
    features = sim.reset()
    action = np.random.choice([0,1])
    qweights = np.zeros((2,80))
    for i in range(80):
        qweights[1] = 1
    sim = Minesweeper(num_mines, width, height)
    action = np.random.choice([0,1])
    while sim.is_terminal() == False:
        features, reward, num_revealed = sim.next(action)
        
    board_covered = num_revealed / (width * height)
    
    return board_covered