コード例 #1
0
    def experiment_explore_vs_exploit(meta_path, save_directory, setups, episodes, steps):    

        alpha = 0.0001
        basis_order = 3

        env = gym.make('CartPole-v0')
        env.reset()

        (obs, reward, done, info) = env.step(env.action_space.sample())
        obs = EnvWrapper.modified_sigmoid(obs)
        phi = fourier_basis(obs, order=basis_order)

        num_features = phi.shape[0]
        num_actions = env.action_space.n

        if os.path.isdir(save_directory) == False:
            os.mkdir(save_directory)

        num_samples = 5

        meta = pickle.load(open(meta_path, "rb")) 

        agents = []
        for setup in setups:
            gym_env = gym.make('CartPole-v0')
            gym_env.env.force_mag = setup["force"]
            gym_env.env.length = setup["pole_length"]
            gym_env.env.masscart = setup["masscart"]
            gym_env.env.masspole = setup["masspole"]
            
            env = EnvWrapper(gym_env, basis_order=basis_order, normalization=0)
            agent = LinearAgent(env, meta_policy=meta, alpha=alpha, algo="SARSA")
            

            agents.append( agent )

        policies = []
        for agent in agents:
            rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False)
            policies.append( copy.deepcopy(agent.learning_algorithm) )


        rewards = []        
        for i, agent in enumerate(agents):
            agent.learning_algorithm = policies[i]
            agent.random_action_prob = 0.0
            agent.RANDOM_ACTION_DECAY = 1.0
            exploit_rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False)

            agent.random_action_prob = 1.0
            explore_rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False)


            rewards.append( {"explore" : explore_rewards, "exploit" : exploit_rewards} )

        pickle.dump(rewards, open(save_directory+"/explore_exploit.pkl", "wb"))
コード例 #2
0
    def experiment_explore_vs_exploit(meta_path, save_directory, setups,
                                      episodes, steps):

        alpha = 0.001
        basis_order = 3

        env = AnimatEnv("./CustomEnvironments/maze1.txt")
        env.reset()

        (obs, reward, done, info) = env.step(env.action_space.sample())
        obs = EnvWrapper.normalize_range(obs, env.env_range)
        phi = fourier_basis(obs, order=basis_order)

        num_features = phi.shape[0]
        num_actions = env.action_space.n

        if os.path.isdir(save_directory) == False:
            os.mkdir(save_directory)

        num_samples = 5

        meta = pickle.load(open(meta_path, "rb"))

        agents = []
        for setup in setups:
            gym_env = AnimatEnv(setup)

            env = EnvWrapper(gym_env, basis_order=basis_order, normalization=1)
            agent = LinearAgent(env,
                                meta_policy=meta,
                                alpha=alpha,
                                algo="REINFORCE")

            agents.append(agent)

        policies = []
        for agent in agents:
            rewards = agent.train(num_episodes=episodes,
                                  max_steps=steps,
                                  verbose=True,
                                  update_meta=False,
                                  render=False)
            policies.append(copy.deepcopy(agent.learning_algorithm))

        rewards = []
        for i, agent in enumerate(agents):
            agent.learning_algorithm = policies[i]
            agent.random_action_prob = 0.0
            agent.RANDOM_ACTION_DECAY = 1.0
            exploit_rewards = agent.train(num_episodes=episodes,
                                          max_steps=steps,
                                          verbose=True,
                                          update_meta=False,
                                          render=False)

            agent.random_action_prob = 1.0
            explore_rewards = agent.train(num_episodes=episodes,
                                          max_steps=steps,
                                          verbose=True,
                                          update_meta=False,
                                          render=False)

            rewards.append({
                "explore": explore_rewards,
                "exploit": exploit_rewards
            })

        pickle.dump(rewards, open(save_directory + "/explore_exploit.pkl",
                                  "wb"))