def _test(self, data_point_ix, data_point, test_image, tensorboard=None, debug=False): image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Test image high_quality_test_image_example = self.get_exploration_image() print("Image shape is ", high_quality_test_image_example.shape) test_image_example = test_image[data_point_ix][0] # Predict the goal predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location( test_image_example, data_point, panaroma=True) current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) print("Predicted Error ", predictor_error) num_actions = 0 actions = [] info = dict() # Dictionary to contain key results info["instruction_string"] = instruction_to_string( data_point.instruction, self.config) info["datapoint_id"] = data_point.get_scene_name() info["stop_dist_error"] = metadata["stop_dist_error"] info["closest_dist_error"] = metadata["closest_dist_error"] info["edit_dist_error"] = metadata["edit_dist_error"] info["num_actions_taken"] = num_actions info["predicted_goal"] = predicted_goal info["predicted_error"] = predictor_error info["gold_goal"] = data_point.get_destination_list()[-1] info["final_location"] = (metadata["x_pos"], metadata["z_pos"]) info["predicted_screen_pixels"] = predicted_pixel self.save_attention_prob(high_quality_test_image_example, attention_prob, info["instruction_string"], info["datapoint_id"]) # self.save_example(image, info["instruction_string"], info["datapoint_id"], scale=5) self.server.halt_and_receive_feedback() return metadata, actions, predictor_error, info
def test_auto_segmented(self, test_dataset, logger=None, tensorboard=None, segmenting_type="oracle"): assert segmenting_type in ("auto", "oracle") self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() self.log( "Performing testing on paragraphs with segmenting type %r" % segmenting_type, logger) metadata = {"feedback": ""} for data_point in test_dataset: if segmenting_type == "auto": segmented_instruction = data_point.get_instruction_auto_segmented( ) else: segmented_instruction = data_point.get_instruction_oracle_segmented( ) max_num_actions = self.constants["horizon"] image, metadata = self.server.reset_receive_feedback(data_point) for instruction_i, instruction in enumerate(segmented_instruction): pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Reset the actions taken and model state num_actions = 0 model_state = None # Predict the goal by performing an exploration image and then finding the next suitable place to visit exploration_image, _, _ = self.server.explore() image_slices = [] for img_ctr in range(0, 6): image_slice = exploration_image[ img_ctr * 3:(img_ctr + 1) * 3, :, :] # 3 x height x width # Scale the intensity of the image as done by scipy.misc.imsave image_slice = scipy.misc.bytescale( image_slice.swapaxes(0, 1).swapaxes(1, 2)) image_slices.append(image_slice) # Reorder and horizontally stitch the images reordered_images = [ image_slices[3], image_slices[4], image_slices[5], image_slices[0], image_slices[1], image_slices[2] ] exploration_image = np.hstack(reordered_images).swapaxes( 1, 2).swapaxes(0, 1) # 3 x height x (width*6) start_pos = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) goal_pos = data_point.get_destination_list()[instruction_i] predicted_goal, predictor_error = self.get_3d_location_for_paragraphs( exploration_image, instruction, start_pos, goal_pos, panaroma=True) current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) print("Predicted Error ", predictor_error) while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp( log_probabilities.data))[0] else: raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: intermediate_goal = data_point.get_destination_list( )[instruction_i] agent_position = metadata["x_pos"], metadata["z_pos"] distance = self._l2_distance(agent_position, intermediate_goal) self.log("Instruction is %r " % instruction, logger) self.log( "Predicted Goal is %r, Goal Reached is %r and Real goal is %r " % (predicted_goal, agent_position, intermediate_goal), logger) self.log( "Agent: Position %r got Distance %r " % (instruction_i + 1, distance), logger) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) # Set the goal based on the current position and angle current_bot_location = metadata["x_pos"], metadata[ "z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) num_actions += 1 image, reward, metadata = self.server.halt_and_receive_feedback() if tensorboard is not None: tensorboard.log_all_test_errors(metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Update the scores based on meta_data self.meta_data_util.log_results(metadata) self.meta_data_util.log_results(metadata) logging.info("Testing data action counts %r", action_counts)
def test_multi_step_action_types(self, test_dataset, vocab, goal_type=None, tensorboard=None, logger=None, pushover_logger=None): """ Perform a single step testing i.e. the goal prediction module is called only once. """ self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 metadata = {"feedback": ""} text_embedding_model = self.goal_prediction_model.text_module for data_point_ix, data_point in enumerate(test_dataset): instruction_string = " ".join([vocab[token_id] for token_id in data_point.instruction]) self.log("Instruction is %r " % instruction_string, logger) # Call the action type model to determine the number of steps token_indices = self.action_type_model.decoding_from_indices_to_indices(data_point.instruction, text_embedding_model) print("Token indices ", token_indices) assert len(token_indices) <= 5 # Call the navigation model image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) num_actions = 0 max_num_actions = self.constants["horizon"] num_inner_loop_steps = int(max_num_actions/max(1, len(token_indices))) model_state = None for outer_loop_iter in range(0, len(token_indices)): if goal_type == "inferred": # Get the panorama and set tracking self._explore_and_set_tracking(data_point, data_point_ix, instruction_string) state.goal = self.get_goal(metadata, goal_type) for inner_loop_iter in range(0, num_inner_loop_steps): # Generate probabilities over actions if isinstance(self.navigation_model, AbstractModel): probabilities = list(torch.exp(self.navigation_model.get_probs(state).data)) elif isinstance(self.navigation_model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.navigation_model.get_probs(state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: log_probabilities, model_state = self.navigation_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if token_indices[outer_loop_iter] == 1: print("Performing interaction") row, col, row_real, col_real = state.goal if row is not None and col is not None: act_name = "interact %r %r" % (row, col) interact_action = self.action_space.get_action_index(act_name) image, reward, metadata = self.server.send_action_receive_feedback(interact_action) if action == self.action_space.get_stop_action_index() or num_actions >= max_num_actions: break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback(action) # Update the agent state state = state.update(image, action, data_point=data_point) state.goal = self.get_goal(metadata, goal_type) num_actions += 1 if num_actions >= max_num_actions: break # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback() if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata, logger) self.log("Overall test results: %r " % metadata, logger) task_completion_accuracy = (task_completion_accuracy * 100.0)/float(max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log("Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger) # self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) pushover_logger.log(pushover_feedback)
def do_train_(house_id, shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, vocab, use_pushover=False): logger.log("In Training...") launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") logger.log("Launched Builds.") server.initialize_server() logger.log("Server Initialized.") # Test policy test_policy = gp.get_argmax_action if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) logger.log('Created Tensorboard Server.') else: tensorboard = None if use_pushover: pushover_logger = None else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent.") action_counts = [0] * action_space.num_actions() max_epochs = 100000 # constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) if tune_dataset_size > 0: # Test on tuning data tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) # Create the learner to compute the loss learner = TmpAsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # TODO change 2 --- unity launch moved up learner.logger = logger for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" %(data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] max_num_actions += constants["max_extra_horizon"] image, metadata = tmp_agent.server.reset_receive_feedback(data_point) instruction = data_point.get_instruction() # instruction_str = TmpAsynchronousContextualBandit.convert_indices_to_text(instruction, vocab) # print("Instruction str is ", instruction_str) # Pose and Orientation gone TODO change 3 state = AgentObservedState(instruction=instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) state.goal = learner.get_goal(metadata) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # logger.log("Training: Meta Data %r " % metadata) # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = tmp_agent.server.send_action_receive_feedback(action) # logger.log("Action is %r, Reward is %r Probability is %r " % (action, reward, probabilities)) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the agent state # Pose and orientation gone, TODO change 4 state = state.update(image, action, data_point=data_point) state.goal = learner.get_goal(metadata) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = tmp_agent.server.halt_and_receive_feedback() total_reward += reward # Store it in the replay memory list if not forced_stop: # logger.log("Action is Stop, Reward is %r Probability is %r " % (reward, probabilities)) replay_item = ReplayMemoryItem(state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32 loss_val = learner.do_update(batch_replay_items) if tensorboard is not None: # cross_entropy = float(learner.cross_entropy.data[0]) # tensorboard.log(cross_entropy, loss_val, 0) tensorboard.log_scalar("loss", loss_val) entropy = float(learner.entropy.data[0])/float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar("Abs_objective_to_entropy_ratio", ratio) tensorboard.log_scalar("total_reward", total_reward) tensorboard.log_scalar("mean navigation error", metadata['mean-navigation-error']) if learner.action_prediction_loss is not None: action_prediction_loss = float(learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss(action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float(learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss(temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float(learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss(object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float(learner.symbolic_language_prediction_loss.data[0]) tensorboard.log_scalar("sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float(learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def test_goal_prediction(self, test_dataset, tensorboard=None, logger=None, pushover_logger=None): self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 sum_loss, count, sum_prob, goal_prob_count = 0, 0, 0, 0 metadata = {"feedback": ""} for data_point_ix, data_point in enumerate(test_dataset): print("Datapoint index ", data_point_ix) image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) ################################## state.goal = GoalPrediction.get_goal_location( metadata, data_point, 8, 8) print("Instruction is ", instruction_to_string(data_point.instruction, self.config)) ################################## # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices() num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None trajectory = data_point.get_trajectory()[0:1] trajectory_len = len(trajectory) while True: if num_actions == trajectory_len: action = self.action_space.get_stop_action_index() else: action = trajectory[num_actions] # Generate probabilities over actions if isinstance(self.model, AbstractModel): raise NotImplementedError() elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, volatile = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] # Compute goal prediction accuracy goal_loss, prob, _ = self.goal_prediction_accuracy( state.goal, volatile) sum_loss += goal_loss count += 1 if prob is not None: sum_prob += prob goal_prob_count += 1 else: raise NotImplementedError() # log_probabilities, model_state = self.model.get_probs(state, model_state) # probabilities = list(torch.exp(log_probabilities.data)) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_all_test_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 # Update the scores based on meta_data self.meta_data_util.log_results(metadata) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) ################################## state.goal = GoalPrediction.get_goal_location( metadata, data_point, 8, 8) ################################## num_actions += 1 print("Finished testing. Now logging.") task_completion_accuracy = (task_completion_accuracy * 100.0) / float( max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log( "Testing: Task completion accuracy is: %r" % task_completion_accuracy, logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log( "Goal Count %r, Mean Goal Loss %r" % (count, sum_loss / float(count)), logger) self.log( "Goal Prob Count %r, Mean Goal Prob %r" % (goal_prob_count, sum_prob / float(goal_prob_count)), logger) self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str( metadata["feedback"] ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy pushover_logger.log(pushover_feedback)
def test_single_step(self, test_dataset, vocab, goal_type="gold", tensorboard=None, logger=None, pushover_logger=None): """ Perform a single step testing i.e. the goal prediction module is called only once. """ self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 metadata = {"feedback": ""} for data_point_ix, data_point in enumerate(test_dataset): instruction_string = " ".join([vocab[token_id] for token_id in data_point.instruction]) self.log("Instruction is %r " % instruction_string, logger) # Call the navigation model image, metadata = self.server.reset_receive_feedback(data_point) if goal_type == "inferred": # Get the panorama and set tracking self._explore_and_set_tracking(data_point, data_point_ix, instruction_string) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) state.goal = self.get_goal(metadata, goal_type) num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None while True: # Generate probabilities over actions if isinstance(self.navigation_model, AbstractModel): probabilities = list(torch.exp(self.navigation_model.get_probs(state).data)) elif isinstance(self.navigation_model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.navigation_model.get_probs(state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: log_probabilities, model_state = self.navigation_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if action == self.action_space.get_stop_action_index() or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback() # if tensorboard is not None: # tensorboard.log_all_test_errors( # metadata["edit_dist_error"], # metadata["closest_dist_error"], # metadata["stop_dist_error"]) # self.log("Testing: Taking stop action and got reward %r " % reward, logger) if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata, logger) self.log("Overall test results: %r " % metadata, logger) ############################################# # Take a dummy manipulation action # row, col, row_real, col_real = state.goal # if row is not None and col is not None: # act_name = "interact %r %r" % (row, col) # interact_action = self.action_space.get_action_index(act_name) # image, reward, metadata = self.server.send_action_receive_feedback(interact_action) ############################################# break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback(action) # self.log("Testing: Taking action %r and got reward %r " % (action, reward), logger) # time.sleep(0.5) # Update the agent state state = state.update(image, action, data_point=data_point) state.goal = self.get_goal(metadata, goal_type) num_actions += 1 task_completion_accuracy = (task_completion_accuracy * 100.0)/float(max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log("Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger) # self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) pushover_logger.log(pushover_feedback)
def _test(self, data_point_ix, data_point, test_image, tensorboard=None, debug=False): image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Test image if test_image is None: test_image_example = self.get_exploration_image() else: test_image_example = test_image[data_point_ix][0] # Predict the goal predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location( test_image_example, data_point, panaroma=True) current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) print("Predicted Error ", predictor_error) num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None actions = [] info = dict() while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) actions.append(action) if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_all_test_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Update the scores based on meta_data self.meta_data_util.log_results(metadata) if debug: # Dictionary to contain key results info["instruction_string"] = instruction_to_string( data_point.instruction, self.config) info["datapoint_id"] = data_point.get_scene_name() info["stop_dist_error"] = metadata["stop_dist_error"] info["closest_dist_error"] = metadata["closest_dist_error"] info["edit_dist_error"] = metadata["edit_dist_error"] info["num_actions_taken"] = num_actions info["predicted_goal"] = predicted_goal info["predicted_error"] = predictor_error info["gold_goal"] = data_point.get_destination_list()[-1] info["final_location"] = (metadata["x_pos"], metadata["z_pos"]) info["predicted_screen_pixels"] = predicted_pixel self.save_attention_prob(test_image_example, attention_prob, info["instruction_string"], info["datapoint_id"]) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update(image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) # Set the goal based on the current position and angle current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) num_actions += 1 # logging.info("Error, Start-Distance, Turn-Angle, %r %r %r", metadata["stop_dist_error"], distance, angle) return metadata, actions, predictor_error, info
def test(self, test_dataset, vocab, tensorboard=None, logger=None, pushover_logger=None): self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 metadata = {"feedback": ""} for data_point_ix, data_point in enumerate(test_dataset): image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) state.goal = self.get_goal(metadata) # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices() num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: log_probabilities, model_state = self.model.get_probs( state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) # if tensorboard is not None: # tensorboard.log_all_test_errors( # metadata["edit_dist_error"], # metadata["closest_dist_error"], # metadata["stop_dist_error"]) # self.log("Testing: Taking stop action and got reward %r " % reward, logger) if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata, logger) # self.log("Overall test results: %r " % metadata, logger) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # self.log("Testing: Taking action %r and got reward %r " % (action, reward), logger) # time.sleep(0.5) # Update the agent state state = state.update(image, action, data_point=data_point) state.goal = self.get_goal(metadata) num_actions += 1 task_completion_accuracy = (task_completion_accuracy * 100.0) / float( max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log( "Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger) # self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) pushover_logger.log(pushover_feedback)
def _test(self, data_point, tensorboard=None): image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) ################################## state.goal = GoalPrediction.get_goal_location(metadata, data_point, 32, 32) ################################## # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices() num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None actions = [] ################################### # distance, angle = self.get_angle_distance(metadata, data_point) ################################### while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) actions.append(action) if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_all_test_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Update the scores based on meta_data self.meta_data_util.log_results(metadata) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update(image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) ################################## state.goal = GoalPrediction.get_goal_location( metadata, data_point, 32, 32) ################################## num_actions += 1 # logging.info("Error, Start-Distance, Turn-Angle, %r %r %r", metadata["stop_dist_error"], distance, angle) return metadata, actions
def test(self, tune_dataset, tune_image, tune_goal_location, tensorboard): total_validation_loss = 0 total_validation_prob = 0 total_validation_exact_accuracy = 0 total_goal_distance = 0 num_items = 0 # Next metric measures when the goal is visible and prediction is within 10\% radius total_epsilon_accuracy = 0 num_visible_items = 0 for data_point_ix, data_point in enumerate(tune_dataset): tune_image_example = tune_image[data_point_ix] goal_location = tune_goal_location[data_point_ix] image = tune_image_example[0] model_state = None state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=None, position_orientation=data_point.get_start_pos(), data_point=data_point) trajectory = data_point.get_trajectory() if self.only_first: trajectory = trajectory[0:1] traj_len = len(trajectory) num_items_ = 0 sum_loss = 0 sum_prob = 0 sum_acc = 0 sum_dist = 0 for action_ix, action in enumerate(trajectory): state.goal = goal_location[action_ix] volatile = self.model.get_attention_prob(state, model_state) goal = goal_location[action_ix] row, col, _, _ = goal if not self.ignore_none or row is not None: if row is None: gold_ix = self.final_height * self.final_width else: gold_ix = row * self.final_width + col loss, prob, meta = GoalPrediction.get_loss_and_prob( volatile, goal, self.final_height, self.final_width) num_items_ += 1 sum_loss = sum_loss + float(loss.data.cpu().numpy()[0]) sum_prob = sum_prob + float(prob.data.cpu().numpy()[0]) inferred_ix = int( torch.max(volatile["attention_logits"], 0)[1].data.cpu().numpy()[0]) if gold_ix == inferred_ix: sum_acc = sum_acc + 1.0 if row is not None: sum_dist = sum_dist + abs(row - int(round(inferred_ix/self.final_width)))\ + abs(col - int(inferred_ix % self.final_height)) if row is not None: num_visible_items += 1 if self.is_close_enough(inferred_ix, row, col): total_epsilon_accuracy += 1 if not self.only_first: image = tune_image_example[action_ix + 1] state = state.update(image, action, pose=None, position_orientation=None, data_point=data_point) if not self.only_first: state.goal = goal_location[traj_len] volatile = self.model.get_attention_prob(state, model_state) goal = goal_location[traj_len] row, col, _, _ = goal if not self.ignore_none or row is not None: if row is None: gold_ix = self.final_height * self.final_width else: gold_ix = row * self.final_width + col loss, prob, _ = GoalPrediction.get_loss_and_prob( volatile, goal, self.final_height, self.final_width) num_items_ += 1 sum_loss = sum_loss + float(loss.data.cpu().numpy()[0]) sum_prob = sum_prob + float(prob.data.cpu().numpy()[0]) inferred_ix = int( torch.max(volatile["attention_logits"], 0)[1].data.cpu().numpy()[0]) if gold_ix == inferred_ix: sum_acc = sum_acc + 1.0 if row is not None: sum_dist = sum_dist + abs(row - int(round(inferred_ix/self.final_width))) \ + abs(col - int(inferred_ix % self.final_width)) if row is not None: num_visible_items += 1 if self.is_close_enough(inferred_ix, row, col): total_epsilon_accuracy += 1 total_validation_loss += sum_loss total_validation_prob += sum_prob total_goal_distance += sum_dist total_validation_exact_accuracy += sum_acc num_items += num_items_ mean_total_goal_distance = total_goal_distance / float( max(num_items, 1)) mean_total_validation_loss = total_validation_loss / float( max(num_items, 1)) mean_total_validation_prob = total_validation_prob / float( max(num_items, 1)) mean_total_validation_accuracy = (total_validation_exact_accuracy * 100.0) / float(max(num_items, 1)) mean_total_epsilon_accuracy = (total_epsilon_accuracy * 100.0) / float( max(num_visible_items, 1)) logging.info( "Mean Test result: L1 Distance is %r, Loss %r, Prob %r, Acc is %r, Epsilon Accuracy is %r" % (mean_total_goal_distance, mean_total_validation_loss, mean_total_validation_prob, mean_total_validation_accuracy, mean_total_epsilon_accuracy)) logging.info( "Num visible items %r, Num Exact Match items is %r, Num epsilon match %r, Num Items is %r " % (num_visible_items, total_validation_exact_accuracy, total_epsilon_accuracy, num_items))
def do_train_(shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousAdvantageActorGAECritic(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # Launch unity launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") for epoch in range(1, max_epochs + 1): learner.epoch = epoch task_completion_accuracy = 0 mean_stop_dist_error = 0 stop_dist_errors = [] for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] + constants[ "max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) state.goal = GoalPrediction.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 # Generate goal if config["do_goal_prediction"]: goal = learner.goal_prediction_calculator.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) else: goal = None if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) state.goal = GoalPrediction.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 mean_stop_dist_error += metadata["stop_dist_error"] stop_dist_errors.append(metadata["stop_dist_error"]) if tensorboard is not None: tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32: loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) # del batch_replay_items[:] # in place list clear if tensorboard is not None: cross_entropy = float(learner.cross_entropy.data[0]) tensorboard.log(cross_entropy, loss_val, 0) entropy = float( learner.entropy.data[0]) / float(num_actions + 1) v_value_loss_per_step = float( learner.value_loss.data[0]) / float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) tensorboard.log_scalar("total_reward", total_reward) tensorboard.log_scalar("v_value_loss_per_step", v_value_loss_per_step) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) mean_stop_dist_error = mean_stop_dist_error / float( len(train_dataset)) task_completion_accuracy = (task_completion_accuracy * 100.0) / float(len(train_dataset)) logger.log("Training: Mean stop distance error %r" % mean_stop_dist_error) logger.log("Training: Task completion accuracy %r " % task_completion_accuracy) bins = range(0, 80, 3) # range of distance histogram, _ = np.histogram(stop_dist_errors, bins) logger.log("Histogram of train errors %r " % histogram) if tune_dataset_size > 0: # Test on tuning data agent.test(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def test(self, tune_dataset, tensorboard): total_validation_loss = 0 total_validation_prob = 0 total_validation_exact_accuracy = 0 total_goal_distance = 0 num_items = 0 # Next metric measures when the goal is visible and prediction is within 10\% radius total_epsilon_accuracy = 0 num_visible_items = 0 # Next metric measures distance in real world and only when goal is visible total_real_world_distance = 0 for data_point_ix, data_point in enumerate(tune_dataset): model_state = None state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=data_point.start_image, previous_action=None, pose=None, position_orientation=None, data_point=data_point) num_items_ = 0 sum_loss = 0 sum_prob = 0 sum_acc = 0 sum_dist = 0 sum_real_world_distance = 0 row, col = data_point.goal_pixel goal = row, col, row, col state.goal = goal volatile = self.model.get_attention_prob(state, model_state) if not self.ignore_none or row is not None: gold_ix = row * self.final_width + col loss, prob, meta = GoalPrediction.get_loss_and_prob( volatile, goal, self.final_height, self.final_width) num_items_ += 1 sum_loss = sum_loss + float(loss.data.cpu().numpy()[0]) sum_prob = sum_prob + float(prob.data.cpu().numpy()[0]) inferred_ix, row_col = self.get_inferred_value(volatile) if gold_ix == inferred_ix: sum_acc = sum_acc + 1.0 if row is not None and col is not None: sum_dist = sum_dist + abs(row - int(round(inferred_ix/self.final_width)))\ + abs(col - int(inferred_ix % self.final_height)) num_visible_items += 1 if self.is_close_enough(inferred_ix, row, col): total_epsilon_accuracy += 1 real_world_distance = self.compute_distance_in_real_world( inferred_ix, data_point) sum_real_world_distance += real_world_distance # Save the map instruction_string = instruction_to_string( data_point.instruction, self.config) # goal_x, goal_y = data_point.goal_location # goal_x, goal_y = round(goal_x, 2), round(goal_y, 2) # predicted_goal_x, predicted_goal_y = predicted_goal # predicted_goal_x, predicted_goal_y = round(predicted_goal_x, 2), round(predicted_goal_y, 2) # instruction_string = instruction_string + \ # "\n (Error: " + str(round(sum_real_world_distance, 2)) + ")" + \ # "\n %r %r %r %r \n" % (goal_x, goal_y, predicted_goal_x, predicted_goal_y) # self.show_image(data_point.get_destination_list()[-1], predicted_goal, data_point.get_start_pos(), # instruction_string) # Save the generated image self.global_id += 1 if self.global_id % 25 == 0: goal_prob = GoalPrediction.generate_gold_prob( goal, 32, 32) predicted_goal = (int(inferred_ix / 32), inferred_ix % 32, int(inferred_ix / 32), inferred_ix % 32) predicted_goal_prob = GoalPrediction.generate_gold_prob( predicted_goal, 32, 32) self.save_attention_prob( data_point.start_image, volatile["attention_probs"][:-1].view(32, 32), data_point.instruction_string, goal_prob[:-1].view(32, 32)) self.save_attention_prob( data_point.start_image, predicted_goal_prob[:-1].view(32, 32), data_point.instruction_string, goal_prob[:-1].view(32, 32)) total_validation_loss += sum_loss total_validation_prob += sum_prob total_goal_distance += sum_dist total_validation_exact_accuracy += sum_acc total_real_world_distance += sum_real_world_distance num_items += num_items_ mean_total_goal_distance = total_goal_distance / float( max(num_items, 1)) mean_total_validation_loss = total_validation_loss / float( max(num_items, 1)) mean_total_validation_prob = total_validation_prob / float( max(num_items, 1)) mean_total_validation_accuracy = (total_validation_exact_accuracy * 100.0) / float(max(num_items, 1)) mean_total_epsilon_accuracy = (total_epsilon_accuracy * 100.0) / float( max(num_visible_items, 1)) mean_real_world_distance = total_real_world_distance / float( max(num_visible_items, 1)) logging.info( "Mean Test result: L1 Distance is %r, Loss %r, Prob %r, Acc is %r, Epsilon Accuracy is %r" % (mean_total_goal_distance, mean_total_validation_loss, mean_total_validation_prob, mean_total_validation_accuracy, mean_total_epsilon_accuracy)) logging.info( "Num visible items %r, Num Exact Match items is %r, Num epsilon match %r, Num Items is %r " % (num_visible_items, total_validation_exact_accuracy, total_epsilon_accuracy, num_items)) logging.info("Num visible items %r, Mean Real World Distance %r " % (num_visible_items, mean_real_world_distance)) return mean_real_world_distance
def test(self, tune_dataset, tensorboard): total_validation_loss = 0 total_validation_prob = 0 total_validation_exact_accuracy = 0 total_validation_visible_exact_accuracy = 0 total_goal_distance = 0 num_items = 0 # Next metric measures when the goal is visible and prediction is within 10\% radius total_epsilon_accuracy = 0 num_visible_items = 0 for data_point_ix, data_point in enumerate(tune_dataset): tune_image_example = data_point.image goal_location = data_point.goal_pixel image = tune_image_example[0] model_state = None state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=None, position_orientation=None, data_point=data_point) num_items_ = 0 sum_loss = 0 sum_prob = 0 sum_acc = 0 sum_visible_exact = 0 sum_dist = 0 goal = goal_location[0] state.goal = goal volatile = self.model.get_attention_prob(state, model_state) row, col, _, _ = goal logging.info("Instruction is %s " % data_point.instruction_string) if not self.ignore_none or row is not None: if row is None or col is None: gold_ix = self.final_height * self.final_width else: gold_ix = row * self.final_width + col loss, prob, meta = GoalPrediction.get_loss_and_prob( volatile, goal, self.final_height, self.final_width) num_items_ += 1 sum_loss = sum_loss + float(loss.data.cpu().numpy()[0]) sum_prob = sum_prob + float(prob.data.cpu().numpy()[0]) inferred_ix, row_col = self.get_inferred_value(volatile) if gold_ix == inferred_ix: sum_acc = sum_acc + 1.0 logging.info("Exact Match") else: logging.info("Did Not Match Exactly") if row is not None and col is not None: sum_dist = sum_dist + abs(row - int(round(inferred_ix/self.final_width)))\ + abs(col - int(inferred_ix % self.final_height)) num_visible_items += 1 if self.is_close_enough(inferred_ix, row, col): total_epsilon_accuracy += 1 if gold_ix == inferred_ix: sum_visible_exact += 1 # # Save the map # instruction_string = instruction_to_string(data_point.instruction, self.config) # goal_x, goal_y = data_point.get_destination_list()[-1] # goal_x, goal_y = round(goal_x, 2), round(goal_y, 2) # predicted_goal_x, predicted_goal_y = predicted_goal # predicted_goal_x, predicted_goal_y = round(predicted_goal_x, 2), round(predicted_goal_y, 2) # instruction_string = instruction_string + \ # "\n (Error: " + str(round(sum_real_world_distance, 2)) + ")" + \ # "\n %r %r %r %r \n" % (goal_x, goal_y, predicted_goal_x, predicted_goal_y) # self.show_image(data_point.get_destination_list()[-1], predicted_goal, data_point.get_start_pos(), # instruction_string) # # # Save the generated image # goal_prob = GoalPrediction.generate_gold_prob(goal, 32, 192) # predicted_goal = (int(inferred_ix/192), inferred_ix % 192, int(inferred_ix/192), inferred_ix % 192) # predicted_goal_prob = GoalPrediction.generate_gold_prob(predicted_goal, 32, 192) # self.save_attention_prob(image, volatile["attention_probs"][:-1].view(32, 192), # instruction_string, goal_prob[:-1].view(32, 192)) # self.save_attention_prob(image, predicted_goal_prob[:-1].view(32, 192), # instruction_string, goal_prob[:-1].view(32, 192)) total_validation_loss += sum_loss total_validation_prob += sum_prob total_goal_distance += sum_dist total_validation_exact_accuracy += sum_acc total_validation_visible_exact_accuracy += sum_visible_exact num_items += num_items_ # Metric over the entire data mean_total_validation_accuracy = (total_validation_exact_accuracy * 100.0) / float(max(num_items, 1)) mean_total_validation_loss = total_validation_loss / float( max(num_items, 1)) mean_total_validation_prob = total_validation_prob / float( max(num_items, 1)) # Metric over examples which are visible mean_total_goal_distance = total_goal_distance / float( max(num_visible_items, 1)) mean_total_epsilon_accuracy = (total_epsilon_accuracy * 100.0) / float( max(num_visible_items, 1)) visible_accuracy = (total_validation_visible_exact_accuracy * 100.0) / float(max(num_visible_items, 1)) logging.info( "Mean Test result (All Data): Num items %r, Exact Match %r, Loss %r, Prob %r, " % (num_items, mean_total_validation_accuracy, mean_total_validation_loss, mean_total_validation_prob)) logging.info( "Num visible items %r, Visible Exact Match Accuracy %r, L1 distance %r, Epsilon Accuracy %r" % (num_visible_items, visible_accuracy, mean_total_goal_distance, mean_total_epsilon_accuracy)) return mean_total_validation_accuracy