Exemple #1
0
def sarsa_lambda_gradient_descent():
  import matplotlib.pyplot as plt
  plt.ion()
  import q_learning as q
  import numpy as np
  from copy import deepcopy
  reload(q)
  game = q.MountainCarGame

  tile_in_row = 9
  n_tilings = 5


  #dot = sum(initial_theta[phi((2,2))])


  state_adapter = q.mountain_car_game_tilings_state_adapter(n_tilings, tile_in_row)

  state_adapter2 = lambda s: np.array(state_adapter(s))

  initial_theta = np.array([1] * tile_in_row * tile_in_row * n_tilings)

  q_algo1 = q.SARSALambdaGradientDescent(game().get_actions(), game().get_state(), 
    initial_q = 0, initial_theta = initial_theta, state_adapter = state_adapter2)

  q_algo1.epsilon = 0.02
  q_algo1.lmbda = 0.5
  q_algo1.gamma = 0.9
  q_algo1.alpha = 0.1
  
  teacher = q.Teacher(game, q_algo1, q.MountainCarGameVisualizer(q_algo1))
  teacher.teach(1)

  teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())
  teacher.teach(30)
Exemple #2
0
def sarsa_lambda_on_mountain_car_game():
    import q_learning as q
    import numpy as np
    from copy import deepcopy
    reload(q)
    game = q.MountainCarGame

    state_adapter = q.mountain_car_game_tilings_state_adapter(tile_in_row=9,
                                                              n_tilings=5)

    q_algo1 = q.SARSALambda(game().get_actions(),
                            game().get_state(),
                            0,
                            memory_size=40,
                            state_adapter=state_adapter)
    q_algo1.epsilon = 0.2
    q_algo1.lmbda = 0.9

    q_algo1.gamma = 0.5

    visualizer = q.MountainCarGameVisualizer(q_algo1)
    teacher = q.Teacher(game, q_algo1, visualizer)

    teacher.teach(1)

    teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())
    teacher.teach(30)
Exemple #3
0
def sarsa_lambda_on_mountain_car_game():
  import q_learning as q
  import numpy as np
  from copy import deepcopy
  reload(q)
  game = q.MountainCarGame
  
  state_adapter = q.mountain_car_game_tilings_state_adapter(tile_in_row = 9, n_tilings = 5)

  q_algo1 = q.SARSALambda(game().get_actions(), game().get_state(), 0, memory_size = 40, state_adapter = state_adapter)
  q_algo1.epsilon = 0.2
  q_algo1.lmbda = 0.9

  q_algo1.gamma = 0.5

  visualizer = q.MountainCarGameVisualizer(q_algo1)
  teacher = q.Teacher(game, q_algo1, visualizer)

  teacher.teach(1)

  teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())
  teacher.teach(30)
Exemple #4
0
def sarsa_lambda_gradient_descent():
    import matplotlib.pyplot as plt
    plt.ion()
    import q_learning as q
    import numpy as np
    from copy import deepcopy
    reload(q)
    game = q.MountainCarGame

    tile_in_row = 9
    n_tilings = 5

    #dot = sum(initial_theta[phi((2,2))])

    state_adapter = q.mountain_car_game_tilings_state_adapter(
        n_tilings, tile_in_row)

    state_adapter2 = lambda s: np.array(state_adapter(s))

    initial_theta = np.array([1] * tile_in_row * tile_in_row * n_tilings)

    q_algo1 = q.SARSALambdaGradientDescent(game().get_actions(),
                                           game().get_state(),
                                           initial_q=0,
                                           initial_theta=initial_theta,
                                           state_adapter=state_adapter2)

    q_algo1.epsilon = 0.02
    q_algo1.lmbda = 0.5
    q_algo1.gamma = 0.9
    q_algo1.alpha = 0.1

    teacher = q.Teacher(game, q_algo1, q.MountainCarGameVisualizer(q_algo1))
    teacher.teach(1)

    teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())
    teacher.teach(30)