Exemple #1
0
    # TODO: load DQN agent
    # ...
    state_dim = 4
    num_actions = 2
    
    Q = MLP(state_dim, num_actions)
    Q_target = MLP(state_dim, num_actions)
    agent = DQNAgent(Q, Q_target, num_actions)
    agent.load("C:\\Users\\Monish\\Desktop\\workspace\\exercise3_R\\reinforcement_learning\\models_cartpole\\dqn_agent_199.pt")
 
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
 
    if not os.path.exists("./results"):
        os.mkdir("./results")  

    fname = "./results/cartpole_results_dqn-%s.json" % datetime.now().strftime("%Y%m%d-%H%M%S")
    fh = open(fname, "w")
    json.dump(results, fh)
            
    num_actions = 2

    Q_network = MLP(state_dim, num_actions)
    Q_target_network = MLP(state_dim, num_actions)
    agent = DQNAgent(Q=Q_network,
                     Q_target=Q_target_network,
                     num_actions=num_actions)
    agent.load(args.model)

    n_test_episodes = args.episodes

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
                            rendering=True,
                            max_timesteps=250)
        episode_rewards.append(stats.episode_reward)
        print('Episode %d (reward: %d)' % (i, stats.episode_reward))

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()

    if not os.path.exists("./results"):
        os.mkdir("./results")

    fname = "./results/cartpole_results_dqn-%s.json" % datetime.now().strftime(