def agent_score(path): config, results_dir = load_agent_config(path) helper = create_helper(config) env_id = '{}-{}-v0'.format(config.gym_env_id, 0) helper.register_gym_environment(env_id, False) env = gym.make(env_id) # .env config.num_episodes = 1 env.seed(config.seed) agent_rng = np.random.RandomState(config.seed) agent, exploration_strategy = create_agent(helper, 1, agent_rng) agent.load(path) behavior_tracker = BehaviorTracker(config.num_episodes) scores = [] for k in range(5): curr_obs = env.reset() curr_s = helper.get_state_from_observation(curr_obs, 0, False) t = 0 done = False while not done: a = agent.act(curr_s) obs, r, done, _ = env.step(a) s = helper.get_state_from_observation(obs, r, done) r = helper.get_reward(curr_s, a, r, s, done) agent.update(curr_s, a, r, s) behavior_tracker.add_sample(curr_s, a) helper.update_stats(k, t, curr_obs, obs, curr_s, a, r, s) curr_s = s curr_obs = obs t += 1 scores.append(env.env.previous_score) env.close() del gym.envs.registration.registry.env_specs[env.spec.id] return sum(scores) / len(scores)
def get_agent_data(config, trial_num): # loads config from agent dir agent_dir = join(pardir, get_agent_output_dir(config, agent_t, trial_num)) config_file = join(agent_dir, 'config.json') if not exists(config_file): return None, None config = EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # tries to load full analysis analyses_dir = get_analysis_output_dir(agent_dir) file_name = join(analyses_dir, 'full-analysis.json') analysis = None if exists(file_name): analysis = FullAnalysis.load_json(file_name) analysis.set_helper(helper) # tries to load all data stats_dir = join(agent_dir, 'results') if exists(stats_dir): helper.load_stats(stats_dir) else: helper = None return analysis, helper
def load_agent_aux(config, agent_type, agent_dir, trial, seed, agent_rng, args, no_output=False): helper = create_helper(config) if not no_output: output_dir = args.output if args.output is not None else get_agent_output_dir(config, agent_type, trial) else: output_dir = join(dirname(dirname(agent_dir)), 'compare/temp') make_clean_dirs(output_dir, hard=True) config.save_json(join(output_dir, 'config.json')) helper.save_state_features(join(output_dir, 'state_features.csv')) env_id = '{}-{}-v0'.format(config.gym_env_id, trial) helper.register_gym_environment(env_id, False, args.fps, args.show_score_bar) env = gym.make(env_id, level=1) # .env config.num_episodes = args.num_episodes video_callable = video_schedule(config, True) env.seed(seed) agent, exploration_strategy = create_agent(helper, agent_type, agent_rng) agent.load(agent_dir) behavior_tracker = BehaviorTracker(config.num_episodes) return env, helper, agent, behavior_tracker, output_dir, video_callable
def run_trial(args): # tries to get agent type agent_t = args.agent if agent_t == AgentType.Testing: # tries to load a pre-trained agent configuration file config, results_dir = load_agent_config(args.results, args.trial) else: # tries to load env config from provided file path config_file = args.config_file_path config = args.default_frogger_config if config_file is None or not exists(config_file) \ else EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # checks for provided output dir output_dir = args.output if args.output is not None else \ get_agent_output_dir(config, agent_t, args.trial) if not exists(output_dir): makedirs(output_dir) # saves / copies configs to file config.save_json(join(output_dir, 'config.json')) helper.save_state_features(join(output_dir, 'state_features.csv')) # register environment in Gym according to env config env_id = '{}-{}-v0'.format(config.gym_env_id, args.trial) helper.register_gym_environment(env_id, False, args.fps, args.show_score_bar) # create environment and monitor env = gym.make(env_id) config.num_episodes = args.num_episodes video_callable = video_schedule(config, args.record) env = Monitor(env, directory=output_dir, force=True, video_callable=video_callable) # adds reference to monitor to allow for gym environments to update video frames if video_callable(0): env.env.monitor = env # initialize seeds (one for the environment, another for the agent) env.seed(config.seed + args.trial) agent_rng = np.random.RandomState(config.seed + args.trial) # creates the agent agent, exploration_strategy = create_agent(helper, agent_t, agent_rng) # if testing, loads tables from file (some will be filled by the agent during the interaction) if agent_t == AgentType.Testing: agent.load(results_dir) # runs episodes behavior_tracker = BehaviorTracker(config.num_episodes) recorded_episodes = [] for e in range(config.num_episodes): # checks whether to activate video monitoring env.env.monitor = env if video_callable(e) else None # reset environment old_obs = env.reset() old_s = helper.get_state_from_observation(old_obs, 0, False) if args.verbose: print(f'Episode: {e}') # helper.update_stats_episode(e) exploration_strategy.update(e) # update for learning agent t = 0 done = False while not done: # select action a = agent.act(old_s) # observe transition obs, r, done, _ = env.step(a) s = helper.get_state_from_observation(obs, r, done) r = helper.get_reward(old_s, a, r, s, done) # update agent and stats agent.update(old_s, a, r, s) behavior_tracker.add_sample(old_s, a) helper.update_stats(e, t, old_obs, obs, old_s, a, r, s) old_s = s old_obs = obs t += 1 # adds to recorded episodes list if video_callable(e): recorded_episodes.append(e) # signals new episode to tracker behavior_tracker.new_episode() # writes results to files agent.save(output_dir) behavior_tracker.save(output_dir) write_table_csv(recorded_episodes, join(output_dir, 'rec_episodes.csv')) helper.save_stats(join(output_dir, 'results'), args.clear_results) print('\nResults of trial {} written to:\n\t\'{}\''.format( args.trial, output_dir)) env.close()
agent_t = int(sys.argv[1]) if len(sys.argv) > 1 else DEF_AGENT_TYPE # tries to load agent from results dir agent_dir = sys.argv[2] if len(sys.argv) > 2 else get_agent_output_dir( DEFAULT_CONFIG, agent_t) if not exists(agent_dir): raise ValueError('Could not load agent from: {}'.format(agent_dir)) config_file = join(agent_dir, 'config.json') if not exists(config_file): raise ValueError('Configuration not found: {}'.format(config_file)) config = EnvironmentConfiguration.load_json(config_file) action_names = config.get_action_names() # creates env helper helper = create_helper(config, SOUND) feats_nbins = helper.get_features_bins() # loads the agent's behavior behavior_tracker = BehaviorTracker(0) behavior_tracker.load(agent_dir) # register environment in Gym according to env_config env_id = '{}-replay-v0'.format(config.gym_env_id) helper.register_gym_environment(env_id, True, FPS, SHOW_SCORE_BAR) # creates environment env = gym.make(env_id) # initializes seed according to config env.seed(config.seed)
agent_t = args.agent explanation_t = args.report # tries to load agent from results dir agent_dir = sys.argv[3] if len(sys.argv) > 3 else get_agent_output_dir(DEFAULT_CONFIG, agent_t) if not exists(agent_dir): raise ValueError('Could not load agent from: {}'.format(agent_dir)) config_file = join(agent_dir, 'config.json') if not exists(config_file): raise ValueError('Configuration not found: {}'.format(config_file)) config = EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # tries to load full analysis analyses_dir = sys.argv[4] if len(sys.argv) > 4 else get_analysis_output_dir(agent_dir) file_name = join(analyses_dir, 'full-analysis.json') if exists(file_name): full_analysis = FullAnalysis.load_json(file_name) else: raise ValueError('Full analysis not found: {}'.format(file_name)) full_analysis.set_helper(helper) # creates and load the agent's tables agent, exploration_strategy = create_agent(helper, DEF_AGENT_TYPE, None) agent.load(agent_dir) # loads the agent's behavior
def run_trial(args): # tries to get agent type agent_t = args.agent results_dir = '' if agent_t == AgentType.Testing: # tries to load config from provided results dir path results_dir = args.results if args.results is not None else \ get_agent_output_dir(DEFAULT_CONFIG, AgentType.Learning) config_file = join(results_dir, 'config.json') if not exists(results_dir) or not exists(config_file): raise ValueError('Could not load configuration from: {}.'.format(config_file)) config = EnvironmentConfiguration.load_json(config_file) # if testing, we want to force a seed different than training (diff. test environments) config.seed += 1 else: # tries to load env config from provided file path config_file = args.config config = DEFAULT_CONFIG if config_file is None or not exists(config_file) \ else EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # checks for provided output dir output_dir = args.output if args.output is not None else get_agent_output_dir(config, agent_t, args.trial) if not exists(output_dir): makedirs(output_dir) # saves / copies configs to file config.save_json(join(output_dir, 'config.json')) helper.save_state_features(join(output_dir, 'state_features.csv')) # register environment in Gym according to env config env_id = '{}-{}-v0'.format(config.gym_env_id, args.trial) helper.register_gym_environment(env_id, False, FPS, SHOW_SCORE_BAR) # create environment and monitor env = gym.make(env_id) # todo config.num_episodes = 100 video_callable = video_schedule(config, args.record) env = Monitor(env, directory=output_dir, force=True, video_callable=video_callable) # adds reference to monitor to allow for gym environments to update video frames if video_callable(0): env.env.monitor = env # initialize seeds (one for the environment, another for the agent) env.seed(config.seed + args.trial) agent_rng = np.random.RandomState(config.seed + args.trial) # creates the agent agent, exploration_strategy = create_agent(helper, agent_t, agent_rng) # if testing, loads tables from file (some will be filled by the agent during the interaction) if agent_t == AgentType.Testing: agent.load(results_dir, ) # runs episodes behavior_tracker = BehaviorTracker(config.num_episodes) recorded_episodes = [] for e in range(config.num_episodes): # checks whether to activate video monitoring env.env.monitor = env if video_callable(e) else None # reset environment old_obs = env.reset() old_s = helper.get_state_from_observation(old_obs, 0, False) if args.verbose: helper.update_stats_episode(e) exploration_strategy.update(e) t = 0 done = False while not done: # select action a = agent.act(old_s) # observe transition obs, r, done, _ = env.step(a) s = helper.get_state_from_observation(obs, r, done) r = helper.get_reward(old_s, a, r, s, done) # update agent and stats agent.update(old_s, a, r, s) behavior_tracker.add_sample(old_s, a) helper.update_stats(e, t, old_obs, obs, old_s, a, r, s) old_s = s old_obs = obs t += 1 # adds to recorded episodes list if video_callable(e): recorded_episodes.append(e) # signals new episode to tracker behavior_tracker.new_episode() # writes results to files agent.save(output_dir) behavior_tracker.save(output_dir) write_table_csv(recorded_episodes, join(output_dir, 'rec_episodes.csv')) helper.save_stats(join(output_dir, 'results'), CLEAR_RESULTS) print('\nResults of trial {} written to:\n\t\'{}\''.format(args.trial, output_dir)) env.close()
parser.add_argument('-o', '--output', help='directory in which to store results') parser.add_argument('-c', '--config', help='path to config file') args = parser.parse_args() # loads environment config from results dir agent_dir = get_agent_output_dir(DEFAULT_CONFIG, args.agent) if not exists(agent_dir): raise ValueError('Could not load agent from: {}'.format(agent_dir)) config_file = join(agent_dir, 'config.json') if not exists(config_file): raise ValueError('Configuration not found: {}'.format(config_file)) env_config = EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(env_config) # tries to load analysis config from given file an_config = get_analysis_config(env_config) if args.config is not None and exists(args.config): an_config = AnalysisConfiguration.load_json(args.config) # creates an agent and loads all tables agent = QLearningAgent(env_config.num_states, env_config.num_actions) agent.load(agent_dir) # creates output dir if needed output_dir = args.output if args.output is not None else get_analysis_output_dir(agent_dir) if not exists(output_dir): makedirs(output_dir) elif CLEAR_RESULTS: