def load_wrap_vb_learner(saved_dir_nm, saved_learner_nm, learner_cfg, nb_actions, name, internal_to_store): """ Loads a value based chainerrl agent (called learner) and wraps it in agent class that provides the interface to use it with malmopy. :param saved_dir_nm: type str, the directory in which the chainerrl is saved :param saved_learner_nm: type str, the name of a saved chainerrl agent (usually number of steps after which the learner was saved) :param learner_cfg: type str, the name of config used to load the chainerrl agent :param nb_actions: type int, the number of actions that the agent can exectue :param name: type str, the name of an agent :param internal_to_store: type list, list of strings with names of attributes of model to store :return: chainerrl agent """ learner = create_value_based_learner(learner_cfg) learner.load(os.path.join(get_results_path(), saved_dir_nm, saved_learner_nm)) created_agent = LearningAgent(learner=learner, name=name, nb_actions=nb_actions, out_dir=os.path.join(get_results_path(), saved_dir_nm, 'state_data'), internal_to_store=internal_to_store) logger.log(msg='Loaded chainerrl agent from {}'. format(os.path.join(get_results_path(), saved_dir_nm, saved_learner_nm)), level=logging.INFO) return created_agent
def simulation(clients, passed_config): """ Performs a simulation of Pig Chase game based on passed config :param clients: not used, just to have uniform interface :param passed_config: type str, the name of the config to use """ sim_config = Config(passed_config) experiment_cfg = sim_config.get_section('EXPERIMENT') results_dir = os.path.join(get_results_path(), 'simulation_{}_{}_{}'.format(sim_config.get_str('BASIC', 'learner'), sim_config.get_str('BASIC', 'network'), datetime.utcnow().isoformat()[:-4])) experiment_cfg["outdir"] = results_dir sim_config.copy_config(results_dir) opponent = PigChaseChallengeAgent(name="Agent_1", p_focused=0.95) agent_env = getattr(env_simulator, sim_config.get_str('BASIC', 'simulator'))(opponent=opponent, **sim_config.get_section( 'SIMULATOR')) learner = create_value_based_learner(passed_config) logger.log(msg='Experiment parameters {}'.format( ' '.join([name + ':' + str(value) for name, value in experiment_cfg.items()])), level=logging.INFO) logger.log(msg='Starting experiment, calling chainerrl function.', level=logging.INFO) experiments.train_agent_with_evaluation(agent=learner, env=agent_env, **experiment_cfg)
def async_simulation(clients, passed_config): """ Performs a simulation on a simulator specified in config. :param clients: not used, just to have uniform interface :param passed_config: type str, the name of the config to use """ sim_config = Config(passed_config) experiment_cfg = sim_config.get_section('EXPERIMENT') results_dir = os.path.join(get_results_path(), 'simulation_{}_{}_{}'.format('A3C', sim_config.get_str('BASIC', 'network'), datetime.utcnow().isoformat()[:-4])) experiment_cfg["outdir"] = results_dir sim_config.copy_config(results_dir) learner = create_async_learner(passed_config) def make_env(process_idx, test): opponent = PigChaseChallengeAgent(name="Agent_1") return getattr(env_simulator, sim_config.get_str('BASIC', 'simulator'))(opponent=opponent, **sim_config.get_section( 'SIMULATOR')) logger.log(msg='Experiment parameters {}'.format( ' '.join([name + ':' + str(value) for name, value in experiment_cfg.items()])), level=logging.INFO) logger.log(msg='Starting experiment, calling chainerrl function.', level=logging.INFO) experiments.train_agent_async( agent=learner, make_env=make_env, profile=True, **experiment_cfg)
def fit_dim_red(traj_dict_fn, n_comp, feature_nm, opponent_type_fn=None): with open(os.path.join(get_results_path(), traj_dict_fn), 'rb') as handle: traj_dict = pickle.load(handle) opponent_type = [] if opponent_type_fn is not None: with open(os.path.join(get_results_path(), opponent_type_fn), 'rb') as handle: opponent_type = pd.read_csv(handle)['type'] opponent_type = [ 1 if opp == 'FocusedAgent' else 0 for opp in list(opponent_type) ] data, traj_ind = traj_2_array(traj_dict, feature_nm) data_scaled = data[:, 0, :] models = ['TSNE', 'Isomap', 'PCA'] for dim_red in models: fig = plt.figure() print('Fitting: ', dim_red) if hasattr(manifold, dim_red): dim_red_model = getattr(manifold, dim_red)(n_components=n_comp) elif hasattr(decomposition, dim_red): dim_red_model = getattr(decomposition, dim_red)(n_components=n_comp) else: raise AttributeError( 'Specified dimensionality reduction not found ' 'in sklearn.mainfold or sklearn.decomposition.') trans_data = dim_red_model.fit_transform(data_scaled) trans_traj_data = reconstruct_traj(trans_data, traj_ind) for index, traj in trans_traj_data.items(): point_type = '.b' if opponent_type_fn is not None: opp_typ = opponent_type[index] point_type = '.r' if int(opp_typ) == 1 else '.b' for step, point in enumerate(traj): plt.plot(point[0], point[1], point_type, markersize=1) fig.suptitle(dim_red) path, f_name = os.path.split(traj_dict_fn) plt.savefig( os.path.join(get_results_path(), path, feature_nm + dim_red + '_dim_red_plot.png'))
def eval_simulation(clients, passed_config): eval_config = Config(passed_config) saved_dir_nm, saved_learner_nm = os.path.split(eval_config.get_str('BASIC', 'load_path')) opponent = PigChaseChallengeAgent(name="Agent_1", visualizer=CsvVisualizer( output_file=os.path.join(get_results_path(), saved_dir_nm, 'challenge_agent_type.csv'))) env = getattr(env_simulator, eval_config.get_str('BASIC', 'simulator'))(opponent=opponent, **eval_config.get_section( 'SIMULATOR')) agent = load_wrap_vb_learner(saved_dir_nm, saved_learner_nm, passed_config, internal_to_store=['h'], name='evaluation_agent', nb_actions=eval_config.get_int('NETWORK', 'output_dim')) eval_episodes_num = eval_config.get_int('BASIC', 'eval_episodes') reward_sum = 0. steps_done = 0 for i in range(1, eval_episodes_num + 1): obs = env.reset() done = False reward = 0 while not done: action = agent.act(obs, reward, done, is_training=False) obs, reward, done, _ = env.step(action) reward_sum += reward steps_done += 1 agent.act(obs, reward, done, is_training=False) agent.learner.model.reset_state() print('episode: {}, reward per step {}'.format(i, 25 * reward_sum / float(steps_done)), 'reward per episode: {}'.format(reward_sum / i)) opponent._visualizer.close() agent.save_stored_stats(os.path.join(get_results_path(), saved_dir_nm, 'internal_states.pickle')) fit_dim_red(os.path.join(saved_dir_nm, 'internal_states.pickle'), feature_nm='h', n_comp=2, opponent_type_fn=os.path.join(saved_dir_nm, 'challenge_agent_type.csv'))
def create_value_based_learner(cfg_name): """ Creates a learner that can be used with value based algorithms from chainerrl. :param cfg_name: type str, the name of the config :return: chainerrl agent specified in config """ vb_config = Config(cfg_name) network = getattr(models, vb_config.get_str('BASIC', 'network'))( **vb_config.get_section('NETWORK')) q_func = q_functions.SingleModelStateQFunctionWithDiscreteAction(model=network) opt = getattr(optimizers, vb_config.get_str('BASIC', 'optimizer'))( **vb_config.get_section('OPTIMIZER')) opt.setup(q_func) opt.add_hook( optimizer.GradientClipping(threshold=vb_config.get_float('BASIC', 'grad_clip'))) rep_buf = replay_buffer.PrioritizedEpisodicReplayBuffer( capacity=vb_config.get_int('MEMORY_BUFFER', 'episodic_buffer_size'), wait_priority_after_sampling=vb_config.get_bool('MEMORY_BUFFER', 'wait_priority_after_sampling')) explorer = explorers.LinearDecayEpsilonGreedy( random_action_func=lambda: np.random.random_integers(0, vb_config.get_int('NETWORK', 'output_dim') - 1), **vb_config.get_section('EXPLORER')) try: learner = getattr(agents, vb_config.get_str('BASIC', 'learner'))(q_function=q_func, optimizer=opt, replay_buffer=rep_buf, phi=lambda x: x, explorer=explorer, **vb_config.get_section( 'ALGORITHM')) if vb_config.get_str('BASIC', 'load_path'): learner.load(os.path.join(get_results_path(), vb_config.get_str('BASIC', 'load_path'))) except AttributeError as e: logger.log(msg='Cannot find model {} in chainerrl.agents'.format( vb_config.get_str('BASIC', 'learner')), level=logging.ERROR) raise e logger.log(msg='Created learner {}'.format(learner.__class__.__name__), level=logging.INFO) logger.log(msg='Model parameters {}'.format( ' '.join([name + ':' + str(value) for name, value in vb_config.get_section('EXPERIMENT').items()])), level=logging.INFO) logger.log(msg='Explorer parameters {}'.format( ' '.join([name + ':' + str(value) for name, value in vb_config.get_section('EXPLORER').items()])), level=logging.INFO) return learner
def value_based_experiment(clients, passed_config): rvb_config = Config(os.path.join(get_config_dir(), passed_config)) results_dir = os.path.join(get_results_path(), 'simulation_{}_{}_{}'.format(rvb_config.get_str('BASIC', 'learner'), rvb_config.get_str('BASIC', 'network'), datetime.utcnow().isoformat()[:-6])) experiment_cfg = rvb_config.get_section('EXPERIMENT') experiment_cfg["outdir"] = results_dir opponent = PigChaseChallengeAgent(name="Agent_1") agent_st_build = CustomStateBuilder() opponent_st_build = PigChaseSymbolicStateBuilder() opponent_env = PigChaseEnvironment(remotes=clients, state_builder=opponent_st_build, role=0, randomize_positions=True) agent_env = PigChaseEnvironment(remotes=clients, state_builder=agent_st_build, role=1, randomize_positions=True) env = SingleEnvWrapper(agent_env=agent_env, opponent_env=opponent_env, opponent=opponent, reward_norm=ENV_CAUGHT_REWARD) learner = create_value_based_learner(passed_config) logger.log(msg='Experiment parameters {}'.format( ' '.join(['{}:{}'.format(name, str(value)) for name, value in experiment_cfg.items()])), level=logging.INFO) logger.log(msg='Starting experiment, calling chainerrl function.', level=logging.INFO) experiments.train_agent_with_evaluation(agent=learner, env=env, **experiment_cfg)