def _check_if_state_expected(self, card_list: List[card.Card]): card_set_same = \ self._current_state_index < len(self._expected_states) and \ state_delta.card_states_equal(self._expected_states[self._current_state_index], card_list) if not card_set_same: # Compare with the next one -- maybe it triggered a card or made a set. self._current_state_index += 1 next_card_set_same = \ self._current_state_index < len(self._expected_states) \ and state_delta.card_states_equal(self._expected_states[self._current_state_index], card_list) if not next_card_set_same: self._valid_state = False
def compute_instruction_metric(metric: Metric, example: instruction_example.InstructionExample, predicted_instruction_sequence: List[str], final_state: state_delta.StateDelta, distance_threshold: int = 0) -> float: correct_final_state: state_delta.StateDelta = example.get_final_state() card_same = state_delta.card_states_equal(correct_final_state.cards, final_state.cards) if metric == Metric.SEQUENCE_ACCURACY: return 1. if example.get_action_sequence( ) == predicted_instruction_sequence else 0. elif metric == Metric.CARD_ACCURACY: return 1. if card_same else 0. elif metric == Metric.RELAXED_ENVIRONMENT_ACCURACY: return 1. if (card_same and manhattan_distance( final_state.follower.get_position(), correct_final_state.follower.get_position()) <= distance_threshold ) else 0. elif metric == Metric.EXACT_ENVIRONMENT_ACCURACY: return 1. if (card_same and final_state.follower.get_position() == correct_final_state.follower.get_position() and final_state.follower.get_rotation() == correct_final_state.follower.get_rotation()) else 0. elif metric == Metric.AGENT_DISTANCE: return manhattan_distance(final_state.follower.get_position(), correct_final_state.follower.get_position()) else: raise ValueError( 'Metric %r not supported by compute_instruction_metric' % metric)
def _check_for_new_set(self, prev_state_delta: state_delta.StateDelta, action: agent_actions.AgentAction, character: environment_objects.ObjectType) -> None: """ Checks whether a set was made after the provided state with the action taken.""" self._current_state_delta = self.get_most_recent_game_info(prev_state_delta, action, character, False) # Then check to see if a set was made if self._expected_sets is not None: new_set: List[card.Card] = state_delta.set_made(prev_state_delta.cards, self._current_state_delta.cards) if new_set: # Check that this was the expected set new_cards: List[card.Card] = [] if self._current_set_index >= len(self._expected_sets): was_expected_set = False else: expected_set: List[card.Card] = self._expected_sets[self._current_set_index][0] new_cards: List[card.Card] = self._expected_sets[self._current_set_index][1] was_expected_set = state_delta.card_states_equal(expected_set, new_set) if was_expected_set: self._add_cards(new_cards) self._current_state_delta.cards.extend(new_cards) self._current_set_index += 1 else: if self._check_valid_state: self._valid_state = False if self._args.generate_new_cards(): # Now, generate random new cards... new_generated_cards: List[card.Card] = list() while not state_delta.set_exists(self._current_state_delta.cards + new_generated_cards): # Generate new cards. new_generated_cards = list() for _ in range(3): new_generated_cards.append(card.generate_random_card_properties()) # Place the cards updated_generated_cards: List[card.Card] = list() non_obstacle_positions: Set[position.Position] = set() for x in range(environment_util.ENVIRONMENT_DEPTH): for y in range(environment_util.ENVIRONMENT_WIDTH): if position.Position(x, y) not in self._obstacle_positions: non_obstacle_positions.add(position.Position(x, y)) for new_card in new_generated_cards: # Possible locations are all places in the map except those with obstacles, and where other # cards are now. possible_locations: Set[position.Position] = \ non_obstacle_positions \ - set([current_card.get_position() for current_card in self._current_state_delta.cards]) chosen_location: position.Position = random.choice(list(possible_locations)) new_card.update_position(chosen_location) # Add the card here now so that it won't place later cards on top of it (unlikely, # but possible) self._current_state_delta.cards.append(new_card) updated_generated_cards.append(new_card) self._add_cards(updated_generated_cards)
def get_expected_card_states(self, first_instruction: int = 0 ) -> List[List[card.Card]]: """Gets the expected resulting cards after a given instruction. """ i: int = 0 num_instructions: int = 0 # Find the first movement or finish-command action after that instruction. for i, action in enumerate(self._actions): if isinstance(action, gameplay_action.FinishCommandAction): if num_instructions == first_instruction: break num_instructions += 1 if isinstance(action, gameplay_action.MovementAction) \ and action.get_agent() == environment_objects.ObjectType.FOLLOWER \ and num_instructions == first_instruction: break assert ( isinstance(self._actions[i], gameplay_action.FinishCommandAction) or isinstance(self._actions[i], gameplay_action.MovementAction)) prev_cards: List[ card.Card] = self._actions[i].get_prior_game_info().cards unique_states: List[List[card.Card]] = [prev_cards] for action in self._actions[i:]: if isinstance(action, gameplay_action.MovementAction) or isinstance( action, gameplay_action.FinishCommandAction): resulting_cards: List[ card.Card] = action.get_prior_game_info().cards # See if it changed if not state_delta.card_states_equal(unique_states[-1], resulting_cards): unique_states.append(resulting_cards) return unique_states
def cascaded_evaluation( game_arguments: game_args.GameArgs, evaluation_arguments: evaluation_args.EvaluationArgs, evaluated_game: cereal_bar_game.CerealBarGame, model: action_generator_model_wrapper.ActionGeneratorModelWrapper, logger: evaluation_logger.EvaluationLogger): logger.disable_logging() number_instructions_followed: List[int] = list() score_increases: List[float] = list() for i, beginning_example in enumerate(evaluated_game.get_examples()): # TODO: Allow to run with a Unity server game_server: game.Game = python_game.PythonGame( game_arguments, evaluated_game.get_hexes(), evaluated_game.get_objects(), evaluated_game.get_initial_state(), verbose=False) # Reset to the correct initial state in the game simulator. game_server.reset_state( leader_actions=beginning_example.get_leader_actions( limit_to_instruction=False), state=beginning_example.get_initial_state(), num_steps_remaining=beginning_example. get_number_of_moves_in_first_turn(), expected_sets=beginning_example.get_expected_sets(), num_instructions=beginning_example. get_number_of_instructions_when_starting()) # Keep track of the partial observations starting from the beginning of this instruction. current_partial_observation: partial_observation.PartialObservation = \ beginning_example.get_partial_observations()[0] followed_instruction_index: int = 0 for j, executed_example in enumerate( evaluated_game.get_examples()[i:]): temporary_example = in_game_example.InGameExample( game_server.get_game_info(), evaluated_game.get_hexes(), evaluated_game.get_objects(), executed_example.get_instruction(), current_partial_observation, executed_example.get_touched_cards()) _, _, visited_states, current_partial_observation = model.get_predictions( temporary_example, game_server, evaluation_arguments, logger) expected_cards_changed: List[ card.Card] = executed_example.get_touched_cards( allow_duplicates=False) filtered_expected_cards: List[card.Card] = list() for card1 in expected_cards_changed: for card2 in visited_states[0].cards: if card1 == card2: filtered_expected_cards.append(card1) changed_cards = instruction_example.get_changed_cards_along_trajectory( visited_states) changed_expected_cards = state_delta.card_states_equal( filtered_expected_cards, changed_cards) if changed_expected_cards: followed_instruction_index += 1 if not changed_expected_cards or not game_server.valid_state(): break number_instructions_followed.append(followed_instruction_index) possible_points_scored: int = len(evaluated_game.get_expected_sets()) if possible_points_scored: score_increases.append( float(game_server.get_score()) / possible_points_scored) possible_num_followed = len(evaluated_game.get_examples()) * ( len(evaluated_game.get_examples()) + 1) / 2 logger.enable_logging() return (float(np.sum(np.array(number_instructions_followed))) / possible_num_followed, float(np.mean(np.array(score_increases))) if score_increases else None)