def generate_env(): env = [bandit.fixed_bandit()] n_actions = env[0].n_actions() n_inputs = env[0].n_inputs() env = bandit.MultiBandit( env, episode_length=experiment_results_generator.EPISODE_LENGTH) return env, n_actions, n_inputs
def generate_env(): env = [bandit.fixed_bandit()] env = bandit.MultiBandit(env, episode_length=episode_length, include_steps=True) n_actions = env.n_actions() n_inputs = env.n_inputs() return env, n_actions, n_inputs
import os from bandits import bandit, ucb_agent, experiment_results_generator if __name__ == '__main__': env = [bandit.random_bandit()] env = bandit.MultiBandit( env, episode_length=experiment_results_generator.EPISODE_LENGTH) agent = ucb_agent.UCB1Agent(n_actions=env.n_actions()) experiment = experiment_results_generator.ExperimentResultsGenerator() experiment.run(env=env, agent=agent) experiment.save_results( save_dir=experiment_results_generator.build_experiment_path(__file__))