コード例 #1
0
    def __init__(self, netsize, Nsensors=1, Nmotors=1):  # Create ising model

        self.size = netsize  # Network size
        self.Ssize = Nsensors  # Number of sensors
        self.Msize = Nmotors  # Number of sensors

        self.h = np.zeros(netsize)
        self.J = np.zeros((netsize, netsize))
        self.max_weights = 2

        self.randomize_state()

        self.env = MountainCarEnv()
        self.env.min_position = -np.pi / 2
        self.env.max_position = np.pi / 6
        self.env.goal_position = np.pi / 6
        self.env.max_speed = 0.045
        self.observation = self.env.reset()

        self.Beta = 1.0
        self.defaultT = max(100, netsize * 20)

        self.Ssize1 = 0
        self.maxspeed = self.env.max_speed
        self.Update(-1)
コード例 #2
0
    rewards_smoothed = pd.Series(stats.episode_rewards).rolling(
        smoothing_window, min_periods=smoothing_window).mean()
    plt.plot(rewards_smoothed)
    plt.xlabel("Episode")
    plt.ylabel("Episode Reward (Smoothed)")
    plt.title("Episode Reward over Time (Smoothed over window size {})".format(
        smoothing_window))
    fig2.savefig('reward.png')
    if noshow:
        plt.close(fig2)
    else:
        plt.show(fig2)


if __name__ == "__main__":
    env = MountainCarEnv()
    approx = NeuralNetwork()
    target = TargetNetwork()

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    # Choose one.
    #stats = q_learning(sess, env, approx, 3000, 1000)
    stats = q_learning(sess,
                       env,
                       approx,
                       1000,
                       1000,
                       use_experience_replay=True,
                       batch_size=128,
                       target=target)
コード例 #3
0
settings = termios.tcgetattr(sys.stdin)


def getKey():
    tty.setraw(sys.stdin.fileno())
    rlist, _, _ = select.select([sys.stdin], [], [])
    if rlist:
        key = sys.stdin.read(1)
    else:
        key = ''

    termios.tcsetattr(sys.stdin, termios.TCSADRAIN, settings)
    return key


env = MountainCarEnv()  #gym.make('MountainCar-v0')
env.reset()

while True:
    env.render()

    key = getKey()
    if key == 'x':
        exit()
    elif key == 'r':  # repeat this game
        ob = env.reset()
        print 'Reset:', ob
        continue

    # Possible actions are: MoveLeft, MoveRight, MoveAhead, MoveBack, LookUp, LookDown, RotateRight, RotateLeft
    if key not in moveBindings:
コード例 #4
0
  # Plot the episode reward over time
  fig2 = plt.figure(figsize=(10,5))
  rewards_smoothed = pd.Series(stats.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean()
  plt.plot(rewards_smoothed)
  plt.xlabel("Episode")
  plt.ylabel("Episode Reward (Smoothed)")
  plt.title("Episode Reward over Time (Smoothed over window size {})".format(smoothing_window))
  fig2.savefig('reward.png')
  if noshow:
      plt.close(fig2)
  else:
      plt.show(fig2)

if __name__ == "__main__":
  env = MountainCarEnv() #gym.make("MountainCar-v0")
  state_dim = env.observation_space.shape[0]
  action_dim = env.action_space.n
  dqn = DQN(state_dim, action_dim, gamma=0.99)

  episodes = 1000
  time_steps = 200
  epsilon = 0.2
  
  stats = dqn.train(episodes, time_steps, epsilon)

  plot_episode_stats(stats)

  for _ in range(5):
    s = env.reset()
    for _ in range(200):