def __init__(self, netsize, Nsensors=1, Nmotors=1): # Create ising model self.size = netsize # Network size self.Ssize = Nsensors # Number of sensors self.Msize = Nmotors # Number of sensors self.h = np.zeros(netsize) self.J = np.zeros((netsize, netsize)) self.max_weights = 2 self.randomize_state() self.env = MountainCarEnv() self.env.min_position = -np.pi / 2 self.env.max_position = np.pi / 6 self.env.goal_position = np.pi / 6 self.env.max_speed = 0.045 self.observation = self.env.reset() self.Beta = 1.0 self.defaultT = max(100, netsize * 20) self.Ssize1 = 0 self.maxspeed = self.env.max_speed self.Update(-1)
rewards_smoothed = pd.Series(stats.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() plt.plot(rewards_smoothed) plt.xlabel("Episode") plt.ylabel("Episode Reward (Smoothed)") plt.title("Episode Reward over Time (Smoothed over window size {})".format( smoothing_window)) fig2.savefig('reward.png') if noshow: plt.close(fig2) else: plt.show(fig2) if __name__ == "__main__": env = MountainCarEnv() approx = NeuralNetwork() target = TargetNetwork() sess = tf.Session() sess.run(tf.global_variables_initializer()) # Choose one. #stats = q_learning(sess, env, approx, 3000, 1000) stats = q_learning(sess, env, approx, 1000, 1000, use_experience_replay=True, batch_size=128, target=target)
settings = termios.tcgetattr(sys.stdin) def getKey(): tty.setraw(sys.stdin.fileno()) rlist, _, _ = select.select([sys.stdin], [], []) if rlist: key = sys.stdin.read(1) else: key = '' termios.tcsetattr(sys.stdin, termios.TCSADRAIN, settings) return key env = MountainCarEnv() #gym.make('MountainCar-v0') env.reset() while True: env.render() key = getKey() if key == 'x': exit() elif key == 'r': # repeat this game ob = env.reset() print 'Reset:', ob continue # Possible actions are: MoveLeft, MoveRight, MoveAhead, MoveBack, LookUp, LookDown, RotateRight, RotateLeft if key not in moveBindings:
# Plot the episode reward over time fig2 = plt.figure(figsize=(10,5)) rewards_smoothed = pd.Series(stats.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean() plt.plot(rewards_smoothed) plt.xlabel("Episode") plt.ylabel("Episode Reward (Smoothed)") plt.title("Episode Reward over Time (Smoothed over window size {})".format(smoothing_window)) fig2.savefig('reward.png') if noshow: plt.close(fig2) else: plt.show(fig2) if __name__ == "__main__": env = MountainCarEnv() #gym.make("MountainCar-v0") state_dim = env.observation_space.shape[0] action_dim = env.action_space.n dqn = DQN(state_dim, action_dim, gamma=0.99) episodes = 1000 time_steps = 200 epsilon = 0.2 stats = dqn.train(episodes, time_steps, epsilon) plot_episode_stats(stats) for _ in range(5): s = env.reset() for _ in range(200):