def MCTS_agent_fn(arena_id, env): args_mcts = dict(recursive=False, max_episode_length=5, num_simulation=100, max_rollout_steps=5, c_init=0.1, c_base=1000000, num_samples=1, num_processes=1, logging=True, logging_graphs=True) args_mcts['agent_id'] = 1 args_mcts['char_index'] = 0 return MCTS_agent(**args_mcts)
max_episode_length=5, num_simulation=100, max_rollout_steps=5, c_init=0.1, c_base=1000000, num_samples=1, num_processes=1, logging=True, logging_graphs=True) args_agent1 = {'agent_id': 1, 'char_index': 0} args_agent2 = {'agent_id': 2, 'char_index': 1} args_agent1.update(args_common) args_agent2.update(args_common) args_agent2.update({'recursive': False}) agents = [lambda x, y: MCTS_agent(**args_agent1), lambda x, y: MCTS_agent(**args_agent2)] arena = ArenaMP(args.max_episode_length, id_run, env_fn, agents) for iter_id in range(num_tries): steps_list, failed_tasks = [], [] if not os.path.isfile(args.record_dir + '/results_{}.pik'.format(0)): test_results = {} else: test_results = pickle.load(open(args.record_dir + '/results_{}.pik'.format(0), 'rb')) current_tried = iter_id for episode_id in episode_ids: curr_log_file_name = args.record_dir + '/logs_agent_{}_{}_{}.pik'.format( env_task_set[episode_id]['task_id'],
num_simulation=100, max_rollout_steps=5, c_init=0.1, c_base=1000000, num_samples=1, num_processes=1, logging=True, logging_graphs=True) args_agent1 = {'agent_id': 1, 'char_index': 0} args_agent2 = {'agent_id': 2, 'char_index': 1} args_agent1.update(args_common) args_agent2.update(args_common) args_agent2.update({'recursive': True}) agents = [ lambda x, y: MCTS_agent(**args_agent1), lambda x, y: Random_agent(**args_agent2) ] arena = ArenaMP(args.max_episode_length, id_run, env_fn, agents) for iter_id in range(num_tries): steps_list, failed_tasks = [], [] if not os.path.isfile(args.record_dir + '/results_{}.pik'.format(0)): test_results = {} else: test_results = pickle.load( open(args.record_dir + '/results_{}.pik'.format(0), 'rb')) current_tried = iter_id for episode_id in episode_ids: