def experiment_explore_vs_exploit(meta_path, save_directory, setups, episodes, steps): alpha = 0.0001 basis_order = 3 env = gym.make('CartPole-v0') env.reset() (obs, reward, done, info) = env.step(env.action_space.sample()) obs = EnvWrapper.modified_sigmoid(obs) phi = fourier_basis(obs, order=basis_order) num_features = phi.shape[0] num_actions = env.action_space.n if os.path.isdir(save_directory) == False: os.mkdir(save_directory) num_samples = 5 meta = pickle.load(open(meta_path, "rb")) agents = [] for setup in setups: gym_env = gym.make('CartPole-v0') gym_env.env.force_mag = setup["force"] gym_env.env.length = setup["pole_length"] gym_env.env.masscart = setup["masscart"] gym_env.env.masspole = setup["masspole"] env = EnvWrapper(gym_env, basis_order=basis_order, normalization=0) agent = LinearAgent(env, meta_policy=meta, alpha=alpha, algo="SARSA") agents.append( agent ) policies = [] for agent in agents: rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False) policies.append( copy.deepcopy(agent.learning_algorithm) ) rewards = [] for i, agent in enumerate(agents): agent.learning_algorithm = policies[i] agent.random_action_prob = 0.0 agent.RANDOM_ACTION_DECAY = 1.0 exploit_rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False) agent.random_action_prob = 1.0 explore_rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False) rewards.append( {"explore" : explore_rewards, "exploit" : exploit_rewards} ) pickle.dump(rewards, open(save_directory+"/explore_exploit.pkl", "wb"))
def experiment_explore_vs_exploit(meta_path, save_directory, setups, episodes, steps): alpha = 0.001 basis_order = 3 env = AnimatEnv("./CustomEnvironments/maze1.txt") env.reset() (obs, reward, done, info) = env.step(env.action_space.sample()) obs = EnvWrapper.normalize_range(obs, env.env_range) phi = fourier_basis(obs, order=basis_order) num_features = phi.shape[0] num_actions = env.action_space.n if os.path.isdir(save_directory) == False: os.mkdir(save_directory) num_samples = 5 meta = pickle.load(open(meta_path, "rb")) agents = [] for setup in setups: gym_env = AnimatEnv(setup) env = EnvWrapper(gym_env, basis_order=basis_order, normalization=1) agent = LinearAgent(env, meta_policy=meta, alpha=alpha, algo="REINFORCE") agents.append(agent) policies = [] for agent in agents: rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False) policies.append(copy.deepcopy(agent.learning_algorithm)) rewards = [] for i, agent in enumerate(agents): agent.learning_algorithm = policies[i] agent.random_action_prob = 0.0 agent.RANDOM_ACTION_DECAY = 1.0 exploit_rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False) agent.random_action_prob = 1.0 explore_rewards = agent.train(num_episodes=episodes, max_steps=steps, verbose=True, update_meta=False, render=False) rewards.append({ "explore": explore_rewards, "exploit": exploit_rewards }) pickle.dump(rewards, open(save_directory + "/explore_exploit.pkl", "wb"))