device = torch.device("cuda" if args.cuda else "cpu") params = common.PARAMS[args.params] game_files = [ "games/%s%s.ulx" % (args.game, s) for s in range(1, args.suffices + 1) ] val_game_file = "games/%s%s.ulx" % (args.game, args.validation) if not all(map(lambda p: pathlib.Path(p).exists(), game_files)): raise RuntimeError( f"Some game files from {game_files} not found! Probably you need to run make_games.sh" ) action_space, observation_space = common.get_games_spaces(game_files + [val_game_file]) env_id = register_games(game_files, request_infos=EnvInfos(**EXTRA_GAME_INFO), name=args.game, action_space=action_space, observation_space=observation_space) print("Registered env %s for game files %s" % (env_id, game_files)) val_env_id = register_games([val_game_file], request_infos=EnvInfos(**EXTRA_GAME_INFO), name=args.game, action_space=action_space, observation_space=observation_space) print("Game %s, with file %s will be used for validation" % (val_env_id, val_game_file)) env = gym.make(env_id) env = preproc.TextWorldPreproc(env, use_admissible_commands=False, keep_admissible_commands=True,
action='store_true', help="Use cuda for training") parser.add_argument("-r", "--run", required=True, help="Run name") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") params = common.PARAMS[args.params] game_files = [ "games/%s%s.ulx" % (args.game, s) for s in range(1, args.suffices + 1) ] if not all(map(lambda p: pathlib.Path(p).exists(), game_files)): raise RuntimeError( f"Some game files from {game_files} not found! Probably you need to run make_games.sh" ) env_id = register_games(game_files, request_infos=EnvInfos(**EXTRA_GAME_INFO), name=args.game) print("Registered env %s for game files %s" % (env_id, game_files)) val_game_file = "games/%s%s.ulx" % (args.game, args.validation) val_env_id = register_games([val_game_file], request_infos=EnvInfos(**EXTRA_GAME_INFO), name=args.game) print("Game %s, with file %s will be used for validation" % (val_env_id, val_game_file)) env = gym.make(env_id) env = preproc.TextWorldPreproc(env) val_env = gym.make(val_env_id) val_env = preproc.TextWorldPreproc(val_env)
def evaluate(data_path, agent): eval_data_path = pjoin(data_path, agent.eval_data_path) with open(eval_data_path) as f: data = json.load(f) data = data[agent.question_type] data = data["random_map"] if agent.random_map else data["fixed_map"] correct_answers = [] predicted_answers = [] print_qa_reward, print_sufficient_info_reward = [], [] for game_path in tqdm(data): game_file_path = pjoin(data_path, game_path) assert os.path.exists( game_file_path ), "Oh no! game path %s does not exist!" % game_file_path env_id = register_games([game_file_path], request_infos=request_infos) env_id = make_batch(env_id, batch_size=agent.eval_batch_size, parallel=True) env = gym.make(env_id) data_questions = [item["question"] for item in data[game_path]] data_answers = [item["answer"] for item in data[game_path]] data_entities = [item["entity"] for item in data[game_path]] if agent.question_type == "attribute": data_attributes = [item["attribute"] for item in data[game_path]] for q_no in range(len(data_questions)): questions = data_questions[q_no:q_no + 1] answers = data_answers[q_no:q_no + 1] reward_helper_info = { "_entities": data_entities[q_no:q_no + 1], "_answers": data_answers[q_no:q_no + 1] } if agent.question_type == "attribute": reward_helper_info["_attributes"] = data_attributes[q_no:q_no + 1] obs, infos = env.reset() batch_size = len(obs) agent.eval() agent.init(obs, infos) # get inputs commands, last_facts, init_facts = [], [], [] commands_per_step, game_facts_cache = [], [] for i in range(batch_size): commands.append("restart") last_facts.append(None) init_facts.append(None) game_facts_cache.append([]) commands_per_step.append(["restart"]) observation_strings, possible_words = agent.get_game_info_at_certain_step( obs, infos) observation_strings = [ a + " <|> " + item for a, item in zip(commands, observation_strings) ] input_quest, input_quest_char, _ = agent.get_agent_inputs( questions) transition_cache = [] for step_no in range(agent.eval_max_nb_steps_per_episode): # update answerer input for i in range(batch_size): if agent.not_finished_yet[i] == 1: agent.naozi.push_one(i, copy.copy(observation_strings[i])) if agent.prev_step_is_still_interacting[i] == 1: new_facts = process_facts(last_facts[i], infos["game"][i], infos["facts"][i], infos["last_action"][i], commands[i]) game_facts_cache[i].append( new_facts ) # info used in reward computing of existence question last_facts[i] = new_facts if step_no == 0: init_facts[i] = copy.copy(new_facts) observation_strings_w_history = agent.naozi.get() input_observation, input_observation_char, _ = agent.get_agent_inputs( observation_strings_w_history) commands, replay_info = agent.act(obs, infos, input_observation, input_observation_char, input_quest, input_quest_char, possible_words, random=False) for i in range(batch_size): commands_per_step[i].append(commands[i]) replay_info = [ observation_strings_w_history, questions, possible_words ] + replay_info transition_cache.append(replay_info) obs, _, _, infos = env.step(commands) # possible words no not depend on history, because one can only interact with what is currently accessible observation_strings, possible_words = agent.get_game_info_at_certain_step( obs, infos) observation_strings = [ a + " <|> " + item for a, item in zip(commands, observation_strings) ] if (step_no == agent.eval_max_nb_steps_per_episode - 1) or (step_no > 0 and np.sum( generic.to_np(replay_info[-1])) == 0): break # The agent has exhausted all steps, now answer question. answerer_input = agent.naozi.get() answerer_input_observation, answerer_input_observation_char, answerer_observation_ids = agent.get_agent_inputs( answerer_input) chosen_word_indices = agent.answer_question_act_greedy( answerer_input_observation, answerer_input_observation_char, answerer_observation_ids, input_quest, input_quest_char) # batch chosen_word_indices_np = generic.to_np(chosen_word_indices) chosen_answers = [ agent.word_vocab[item] for item in chosen_word_indices_np ] correct_answers.extend(answers) predicted_answers.extend(chosen_answers) # rewards # qa reward qa_reward_np = reward_helper.get_qa_reward(answers, chosen_answers) # sufficient info rewards masks = [item[-1] for item in transition_cache] masks_np = [generic.to_np(item) for item in masks] # 1 1 0 0 0 --> 1 1 0 0 0 0 game_finishing_mask = np.stack( masks_np + [np.zeros( (batch_size, ))], 0) # game step+1 x batch size # 1 1 0 0 0 0 --> 0 1 0 0 0 game_finishing_mask = game_finishing_mask[:-1, :] - game_finishing_mask[ 1:, :] # game step x batch size if agent.question_type == "location": # sufficient info reward: location question reward_helper_info[ "observation_before_finish"] = answerer_input reward_helper_info["game_finishing_mask"] = game_finishing_mask sufficient_info_reward_np = reward_helper.get_sufficient_info_reward_location( reward_helper_info) elif agent.question_type == "existence": # sufficient info reward: existence question reward_helper_info[ "observation_before_finish"] = answerer_input reward_helper_info[ "game_facts_per_step"] = game_facts_cache # facts before issuing command (we want to stop at correct state) reward_helper_info["init_game_facts"] = init_facts reward_helper_info["full_facts"] = infos["facts"] reward_helper_info["answers"] = answers reward_helper_info["game_finishing_mask"] = game_finishing_mask sufficient_info_reward_np = reward_helper.get_sufficient_info_reward_existence( reward_helper_info) elif agent.question_type == "attribute": # sufficient info reward: attribute question reward_helper_info["answers"] = answers reward_helper_info[ "game_facts_per_step"] = game_facts_cache # facts before and after issuing commands (we want to compare the differnce) reward_helper_info["init_game_facts"] = init_facts reward_helper_info["full_facts"] = infos["facts"] reward_helper_info[ "commands_per_step"] = commands_per_step # commands before and after issuing commands (we want to compare the differnce) reward_helper_info["game_finishing_mask"] = game_finishing_mask sufficient_info_reward_np = reward_helper.get_sufficient_info_reward_attribute( reward_helper_info) else: raise NotImplementedError r_qa = np.mean(qa_reward_np) r_sufficient_info = np.mean(np.sum(sufficient_info_reward_np, -1)) print_qa_reward.append(r_qa) print_sufficient_info_reward.append(r_sufficient_info) env.close() precision, recall, fscore, _ = precision_recall_fscore_support( correct_answers, predicted_answers, average='micro') print("\n\n---------- From evaluation --------\n") print("precision: %f, recall: %f, f1 score: %f" % (precision, recall, fscore)) print("\n\n---------------------------------") print("===== Eval =====: qa acc: {:2.3f} | correct state: {:2.3f}".format( np.mean(print_qa_reward), np.mean(print_sufficient_info_reward))) return np.mean(print_qa_reward), np.mean(print_sufficient_info_reward)