def debug_manual_control(self, data_point, vocab): self.server.clear_metadata() task_completion_accuracy = 0 image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) num_actions = 0 print("Instruction is ", " ".join([vocab[index] for index in data_point.instruction])) plt.ion() while True: # Show the goal location self.show_goal_location(image, metadata) incorrect_action = True action_string = None while incorrect_action: action_string = input( "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n" ) if action_string in ['0', '1', '2', '3', '4']: incorrect_action = False if action_string == '4': interact_values = input( "Enter the row and column in format: row col") row, col = interact_values.split() row, col = int(row), int(col) action_string = 4 + row * 32 + col action = int(action_string) action_name = self.action_space.get_action_name(action) if action == self.action_space.get_stop_action_index(): # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) print("Metadata is ", metadata) if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state state = state.update(image, action, data_point=data_point) num_actions += 1 print("Metadata is ", metadata) print("Took action %r, Got reward %r" % (action_name, reward))
def do_train(self, agent, train_dataset, test_dataset, experiment_name): """ Perform training """ dataset_size = len(train_dataset) clock = 0 clock_max = 1 #32 for epoch in range(1, self.max_epoch + 1): logging.info("Starting epoch %d", epoch) self.test_classifier(agent, test_dataset) for data_point_ix, data_point in enumerate(train_dataset): if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) batch_replay_items = [] state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=None, # image, previous_action=None) segments = data_point.get_instruction_oracle_segmented() segment_lens = [len(s) for s in segments] for seg_len in segment_lens: num_read = 0 while num_read < seg_len: state = state.update_on_read() num_read += 1 if num_read < seg_len: batch_replay_items.append((state, 0)) else: batch_replay_items.append((state, 1)) state = state.update_on_act_halt() # add to global memory for replay_item in batch_replay_items: self.global_replay_memory.append(replay_item) clock += 1 if clock % clock_max == 0: batch_replay_items = self.sample_from_global_memory() self.global_replay_memory.clear() clock = 0 # Perform update loss_val = self.do_update(batch_replay_items) self.tensorboard.log_loglikelihood_position(loss_val) # Save the model self.model.save_model(experiment_name + "/mle_segmentation_prediction_epoch_" + str(epoch))
def test_baseline(self, test_dataset): self.server.clear_metadata() metadata = {"feedback": ""} num_actions_list = [] task_completion_accuracy = 0 for data_point in test_dataset: image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None) num_actions = 0 # max_num_actions = len(data_point.get_trajectory()) # max_num_actions += self.constants["max_extra_horizon"] num_segments = len(data_point.get_instruction_oracle_segmented()) max_num_actions = self.constants["horizon"] * num_segments while True: action = self.get_next_action(data_point, num_actions) if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) num_actions_list.append(num_actions) self.meta_data_util.log_results(metadata) if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state state = state.update(image, action) num_actions += 1 # self._save_agent_state(state, num_actions) self.meta_data_util.log_results(metadata) task_completion_accuracy /= float(max(len(test_dataset), 1)) task_completion_accuracy *= 100.0 mean_num_actions = float(np.array(num_actions_list).mean()) logging.info("Task completion accuracy %r", task_completion_accuracy) logging.info("Done testing baseline %r, mean num actions is %f", self.baseline_name, mean_num_actions)
def test_human_performance(self, dataset, vocab, logger): self.server.clear_metadata() for data_point in dataset: task_completion_accuracy = 0 image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) num_actions = 0 print("Instruction is ", " ".join([vocab[index] for index in data_point.instruction])) while True: incorrect_action = True action_string = None while incorrect_action: action_string = input( "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n" ) if action_string in ['0', '1', '2', '3', '4']: incorrect_action = False if action_string == '4': interact_values = input( "Enter the row and column in format: row col") row, col = interact_values.split() row, col = int(row), int(col) action_string = 4 + row * 32 + col action = int(action_string) if action == self.action_space.get_stop_action_index(): # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 logger.log("Completed the task") logger.log("Meta data is %r " % metadata) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state state = state.update(image, action, data_point=data_point) num_actions += 1
def get_3d_location(self, exploration_image, data_point, panaroma=True): state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=exploration_image, previous_action=None, pose=None, position_orientation=data_point.get_start_pos(), data_point=data_point) volatile = self.predictor_model.get_attention_prob(state, model_state=None) inferred_ix = int( torch.max(volatile["attention_logits"], 0)[1].data.cpu().numpy()[0]) # Max pointed about that when inferred ix above is the last value then calculations are buggy. He is right. predicted_row = int(inferred_ix / float(192)) predicted_col = inferred_ix % 192 screen_pos = (predicted_row, predicted_col) if panaroma: # Index of the 6 image where the goal is region_index = int(predicted_col / 32) predicted_col = predicted_col % 32 # Column within that image where the goal is pos = data_point.get_start_pos() new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\ get_new_pos_angle_from_region_index(region_index, pos) metadata = { "x_pos": pos[0], "z_pos": pos[1], "y_angle": new_pos_angle } else: pos = data_point.get_start_pos() metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]} row, col = predicted_row + 0.5, predicted_col + 0.5 start_pos = current_pos_from_metadata(metadata) start_pose = current_pose_from_metadata(metadata) goal_pos = data_point.get_destination_list()[-1] height_drone = 2.5 x_gen, z_gen = get_inverse_object_position( row, col, height_drone, 30, 32, 32, (start_pos[0], start_pos[1], start_pose)) predicted_goal_pos = (x_gen, z_gen) x_goal, z_goal = goal_pos x_diff = x_gen - x_goal z_diff = z_gen - z_goal dist = math.sqrt(x_diff * x_diff + z_diff * z_diff) return predicted_goal_pos, dist, screen_pos, volatile[ "attention_probs"]
def debug_human_control(self, data_point, tensorboard=None): image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) num_actions = 0 max_num_actions = self.constants["horizon"] actions = [] message = "" for action in range(self.action_space.num_actions()): message = message + "%d (%s) " % ( action, self.action_space.get_action_name(action)) + " " while True: # Use test policy to get the action action = input("Take action according to the message: " + str(message)) action = int(action) actions.append(action) if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_scalar("navigation_error", metadata["navigation_error"]) # Update the scores based on meta_data self.meta_data_util.log_results(metadata) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state state = state.update(image, action, data_point=data_point) num_actions += 1 return metadata, actions
def calc_log_prob(self, tune_dataset, tune_image, tensorboard): total_validation_log_probability = 0 for data_point_ix, data_point in enumerate(tune_dataset): tune_image_example = tune_image[data_point_ix] image = tune_image_example[0] model_state = None state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=None, position_orientation=None, data_point=data_point) trajectory = data_point.get_trajectory() validation_log_probability = 0 for action_ix, action in enumerate(trajectory): log_probabilities, model_state, image_emb_seq = self.model.get_probs( state, model_state) validation_log_probability += float( log_probabilities.data[0][action]) image = tune_image_example[action_ix + 1] state = state.update(image, action, pose=None, position_orientation=None, data_point=data_point) log_probabilities, model_state, image_emb_seq = self.model.get_probs( state, model_state) validation_log_probability += float(log_probabilities.data[0][ self.action_space.get_stop_action_index()]) mean_validation_log_probability = validation_log_probability / float( len(trajectory) + 1) tensorboard.log_scalar("Validation_Log_Prob", mean_validation_log_probability) total_validation_log_probability += mean_validation_log_probability total_validation_log_probability /= float(max(len(tune_dataset), 1)) logging.info("Mean Validation Log Prob is %r", total_validation_log_probability)
def _explore_and_set_tracking(self, server, data_point): # Get the panoramic image panorama, _ = server.explore() # Get the panorama and predict the goal location state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=panorama, previous_action=None, pose=None, position_orientation=None, data_point=data_point) volatile = self.local_predictor_model.get_attention_prob(state, model_state=None) attention_prob = list(volatile["attention_probs"].view(-1)[:-1].data.cpu().numpy()) inferred_ix = gp.sample_action_from_prob(attention_prob) sampled_prob = volatile["attention_probs"][inferred_ix] if inferred_ix == 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]: print("Predicting Out-of-sight") return assert 0 <= inferred_ix < 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"] row = int(inferred_ix / (6 * self.config["num_manipulation_col"])) col = inferred_ix % (6 * self.config["num_manipulation_col"]) region_ix = int(col / self.config["num_manipulation_col"]) if region_ix == 0: camera_ix = 3 elif region_ix == 1: camera_ix = 4 elif region_ix == 2: camera_ix = 5 elif region_ix == 3: camera_ix = 0 elif region_ix == 4: camera_ix = 1 elif region_ix == 5: camera_ix = 2 else: raise AssertionError("region ix should be in {0, 1, 2, 3, 4, 5}. Found ", region_ix) col = col % self.config["num_manipulation_col"] # Set tracking row_value = min(1.0, (row + 0.5) / float(self.config["num_manipulation_row"])) col_value = min(1.0, (col + 0.5) / float(self.config["num_manipulation_col"])) server.set_tracking(camera_ix, row_value, col_value) return sampled_prob
def do_train(self, agent, train_dataset, test_dataset, train_images, test_images, experiment_name): """ Perform training """ dataset_size = len(train_dataset) clock = 0 clock_max = 1 for epoch in range(1, self.max_epoch + 1): logging.info("Starting epoch %d", epoch) self.test_classifier(agent, test_dataset, test_images) for data_point_ix, data_point in enumerate(train_dataset): if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) batch_replay_items = [] state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=train_images[data_point_ix], previous_action=None, data_point=data_point) # Store it in the replay memory list symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point) replay_item = SymbolicTextReplayMemoryItem(state, symbolic_form) batch_replay_items.append(replay_item) # Global for replay_item in batch_replay_items: self.global_replay_memory.append(replay_item) clock += 1 if clock % clock_max == 0: batch_replay_items = self.sample_from_global_memory() self.global_replay_memory.clear() clock = 0 # Perform update loss_val = self.do_update(batch_replay_items) self.tensorboard.log_loglikelihood_position(loss_val) # Save the model self.model.save_model(experiment_name + "/ml_learning_symbolic_text_prediction_epoch_" + str(epoch))
def do_train_(house_id, shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, vocab, use_pushover=False): logger.log("In Training...") launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") logger.log("Launched Builds.") server.initialize_server() logger.log("Server Initialized.") # Test policy test_policy = gp.get_argmax_action if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) logger.log('Created Tensorboard Server.') else: tensorboard = None if use_pushover: pushover_logger = None else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent.") action_counts = [0] * action_space.num_actions() max_epochs = 100000 # constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) if tune_dataset_size > 0: # Test on tuning data tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) # Create the learner to compute the loss learner = TmpAsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # TODO change 2 --- unity launch moved up learner.logger = logger for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" %(data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] max_num_actions += constants["max_extra_horizon"] image, metadata = tmp_agent.server.reset_receive_feedback(data_point) instruction = data_point.get_instruction() # instruction_str = TmpAsynchronousContextualBandit.convert_indices_to_text(instruction, vocab) # print("Instruction str is ", instruction_str) # Pose and Orientation gone TODO change 3 state = AgentObservedState(instruction=instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) state.goal = learner.get_goal(metadata) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # logger.log("Training: Meta Data %r " % metadata) # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = tmp_agent.server.send_action_receive_feedback(action) # logger.log("Action is %r, Reward is %r Probability is %r " % (action, reward, probabilities)) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the agent state # Pose and orientation gone, TODO change 4 state = state.update(image, action, data_point=data_point) state.goal = learner.get_goal(metadata) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = tmp_agent.server.halt_and_receive_feedback() total_reward += reward # Store it in the replay memory list if not forced_stop: # logger.log("Action is Stop, Reward is %r Probability is %r " % (reward, probabilities)) replay_item = ReplayMemoryItem(state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32 loss_val = learner.do_update(batch_replay_items) if tensorboard is not None: # cross_entropy = float(learner.cross_entropy.data[0]) # tensorboard.log(cross_entropy, loss_val, 0) tensorboard.log_scalar("loss", loss_val) entropy = float(learner.entropy.data[0])/float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar("Abs_objective_to_entropy_ratio", ratio) tensorboard.log_scalar("total_reward", total_reward) tensorboard.log_scalar("mean navigation error", metadata['mean-navigation-error']) if learner.action_prediction_loss is not None: action_prediction_loss = float(learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss(action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float(learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss(temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float(learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss(object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float(learner.symbolic_language_prediction_loss.data[0]) tensorboard.log_scalar("sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float(learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def test_multi_step_action_types(self, test_dataset, vocab, goal_type=None, tensorboard=None, logger=None, pushover_logger=None): """ Perform a single step testing i.e. the goal prediction module is called only once. """ self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 metadata = {"feedback": ""} text_embedding_model = self.goal_prediction_model.text_module for data_point_ix, data_point in enumerate(test_dataset): instruction_string = " ".join([vocab[token_id] for token_id in data_point.instruction]) self.log("Instruction is %r " % instruction_string, logger) # Call the action type model to determine the number of steps token_indices = self.action_type_model.decoding_from_indices_to_indices(data_point.instruction, text_embedding_model) print("Token indices ", token_indices) assert len(token_indices) <= 5 # Call the navigation model image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) num_actions = 0 max_num_actions = self.constants["horizon"] num_inner_loop_steps = int(max_num_actions/max(1, len(token_indices))) model_state = None for outer_loop_iter in range(0, len(token_indices)): if goal_type == "inferred": # Get the panorama and set tracking self._explore_and_set_tracking(data_point, data_point_ix, instruction_string) state.goal = self.get_goal(metadata, goal_type) for inner_loop_iter in range(0, num_inner_loop_steps): # Generate probabilities over actions if isinstance(self.navigation_model, AbstractModel): probabilities = list(torch.exp(self.navigation_model.get_probs(state).data)) elif isinstance(self.navigation_model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.navigation_model.get_probs(state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: log_probabilities, model_state = self.navigation_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if token_indices[outer_loop_iter] == 1: print("Performing interaction") row, col, row_real, col_real = state.goal if row is not None and col is not None: act_name = "interact %r %r" % (row, col) interact_action = self.action_space.get_action_index(act_name) image, reward, metadata = self.server.send_action_receive_feedback(interact_action) if action == self.action_space.get_stop_action_index() or num_actions >= max_num_actions: break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback(action) # Update the agent state state = state.update(image, action, data_point=data_point) state.goal = self.get_goal(metadata, goal_type) num_actions += 1 if num_actions >= max_num_actions: break # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback() if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata, logger) self.log("Overall test results: %r " % metadata, logger) task_completion_accuracy = (task_completion_accuracy * 100.0)/float(max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log("Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger) # self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) pushover_logger.log(pushover_feedback)
def test_single_step(self, test_dataset, vocab, goal_type="gold", tensorboard=None, logger=None, pushover_logger=None): """ Perform a single step testing i.e. the goal prediction module is called only once. """ self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 metadata = {"feedback": ""} for data_point_ix, data_point in enumerate(test_dataset): instruction_string = " ".join([vocab[token_id] for token_id in data_point.instruction]) self.log("Instruction is %r " % instruction_string, logger) # Call the navigation model image, metadata = self.server.reset_receive_feedback(data_point) if goal_type == "inferred": # Get the panorama and set tracking self._explore_and_set_tracking(data_point, data_point_ix, instruction_string) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) state.goal = self.get_goal(metadata, goal_type) num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None while True: # Generate probabilities over actions if isinstance(self.navigation_model, AbstractModel): probabilities = list(torch.exp(self.navigation_model.get_probs(state).data)) elif isinstance(self.navigation_model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.navigation_model.get_probs(state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: log_probabilities, model_state = self.navigation_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if action == self.action_space.get_stop_action_index() or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback() # if tensorboard is not None: # tensorboard.log_all_test_errors( # metadata["edit_dist_error"], # metadata["closest_dist_error"], # metadata["stop_dist_error"]) # self.log("Testing: Taking stop action and got reward %r " % reward, logger) if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata, logger) self.log("Overall test results: %r " % metadata, logger) ############################################# # Take a dummy manipulation action # row, col, row_real, col_real = state.goal # if row is not None and col is not None: # act_name = "interact %r %r" % (row, col) # interact_action = self.action_space.get_action_index(act_name) # image, reward, metadata = self.server.send_action_receive_feedback(interact_action) ############################################# break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback(action) # self.log("Testing: Taking action %r and got reward %r " % (action, reward), logger) # time.sleep(0.5) # Update the agent state state = state.update(image, action, data_point=data_point) state.goal = self.get_goal(metadata, goal_type) num_actions += 1 task_completion_accuracy = (task_completion_accuracy * 100.0)/float(max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log("Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger) # self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) pushover_logger.log(pushover_feedback)
def _explore_and_set_tracking(self, data_point, data_point_ix, instruction_string): # Get the panoramic image panorama, _ = self.server.explore() ########################################### # original_large_panorama = panorama.copy() # panorama = scipy.misc.imresize(panorama.swapaxes(0, 1).swapaxes(1, 2), (128, 128*6, 3)).swapaxes(1, 2).swapaxes(0, 1) ########################################### # Get the panorama and predict the goal location state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=panorama, previous_action=None, pose=None, position_orientation=None, data_point=data_point) volatile = self.goal_prediction_model.get_attention_prob(state, model_state=None) inferred_ix = int(torch.max(volatile["attention_logits"], 0)[1].data.cpu().numpy()[0]) ########################################## # self.save_large_panorama_heat_maps(data_point_ix, original_large_panorama, # volatile["attention_probs"], instruction_string, scale=5) ########################################## if inferred_ix == 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]: print("Predicting Out-of-sight") return None assert 0 <= inferred_ix < 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"] row = int(inferred_ix / (6 * self.config["num_manipulation_col"])) col = inferred_ix % (6 * self.config["num_manipulation_col"]) region_ix = int(col / self.config["num_manipulation_col"]) if region_ix == 0: camera_ix = 3 elif region_ix == 1: camera_ix = 4 elif region_ix == 2: camera_ix = 5 elif region_ix == 3: camera_ix = 0 elif region_ix == 4: camera_ix = 1 elif region_ix == 5: camera_ix = 2 else: raise AssertionError("region ix should be in {0, 1, 2, 3, 4, 5}. Found ", region_ix) col = col % self.config["num_manipulation_col"] # Set tracking row_value = min(1.0, (row + 0.5) / float(self.config["num_manipulation_row"])) col_value = min(1.0, (col + 0.5) / float(self.config["num_manipulation_col"])) message = self.server.set_tracking(camera_ix, row_value, col_value) # self.save_panorama_heat_maps(data_point_ix, panorama, region_ix, row, col, instruction_string) return message.decode("utf-8")
def do_train_(shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, vocab, use_pushover=False): print("In training...") launch_k_unity_builds([config["port"]], "./simulators/house_3_elmer.x86_64") server.initialize_server() print("launched builds") # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: # pushover_logger = PushoverLogger(experiment_name) pushover_logger = None else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = TmpSupervisedLearning(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # TODO change 2 --- unity launch moved up for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) image, metadata = tmp_agent.server.reset_receive_feedback( data_point) # instruction = TmpSupervisedLearning.convert_text_to_indices(metadata["instruction"], vocab) instruction = data_point.get_instruction() # Pose and Orientation gone TODO change 3 state = AgentObservedState(instruction=instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) model_state = None batch_replay_items = [] total_reward = 0 # trajectory = metadata["trajectory"] trajectory = data_point.get_trajectory()[0:300] for action in trajectory: # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) # Sample action from the probability action_counts[action] += 1 # Send the action and get feedback image, reward, metadata = tmp_agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the agent state # Pose and orientation gone, TODO change 4 state = state.update(image, action, data_point=data_point) total_reward += reward # Send final STOP action and get feedback # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) image, reward, metadata = tmp_agent.server.halt_and_receive_feedback( ) total_reward += reward # if tensorboard is not None: # tensorboard.log_all_train_errors( # metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32 loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) # del batch_replay_items[:] # in place list clear if tensorboard is not None: # cross_entropy = float(learner.cross_entropy.data[0]) # tensorboard.log(cross_entropy, loss_val, 0) num_actions = len(trajectory) + 1 tensorboard.log_scalar( "loss_val", loss_val) # /float(num_actions)) entropy = float( learner.entropy.data[0]) # /float(num_actions) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) if learner.mean_factor_entropy is not None: mean_factor_entropy = float( learner.mean_factor_entropy.data[0]) tensorboard.log_factor_entropy_loss( mean_factor_entropy) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data print("Going for testing") tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) print("Done testing")
def try_to_progress(self): # If in state (1) or (2) then return immediately if self.status == Client.WAITING_FOR_EXAMPLE or self.status == Client.WAITING_FOR_ACTION: return self.status assert self.status == Client.WAITING_TO_RECEIVE # If in state (3) then see if the message is available. If the message # is available then return to waiting for an action or a new example. if self.state is None: feedback = self.server.receive_reset_feedback_nonblocking() else: feedback = self.server.receive_feedback_nonblocking() if feedback is None: return self.status else: if self.state is None: # assert False, "state should not be none" # Feedback is in response to reset image, metadata = feedback pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) self.state = AgentObservedState( instruction=self.current_data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=self.current_data_point) # Waiting for action self.status = Client.WAITING_FOR_ACTION else: # Feedback is in response to an action image, reward, metadata = feedback self.total_reward += reward # Create a replay item unless it is forced if not self.forced_stop: all_rewards = self._get_all_rewards(metadata) replay_item = ReplayMemoryItem( self.state, self.last_action, reward, log_prob=self.last_log_prob, image_emb_seq=self.image_emb_seq, factor_entropy=self.factor_entropy, all_rewards=all_rewards) self.batch_replay_items.append(replay_item) # Update the agent state self.state = self.state.update( image, self.last_action, data_point=self.current_data_point) if self.last_action == self.agent.action_space.get_stop_action_index( ): # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) if self.tensorboard is not None: self.tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) self.status = Client.WAITING_FOR_EXAMPLE else: if self.num_action >= self.max_num_actions: # Send forced stop action and wait to receive self._take_forced_stop() self.status = Client.WAITING_TO_RECEIVE else: # Wait to take another action self.status = Client.WAITING_FOR_ACTION self.metadata = metadata return self.status
def do_train_forced_reading(self, agent, train_dataset, tune_dataset, experiment_name): """ Perform training """ assert isinstance( agent, ReadPointerAgent ), "This learning algorithm works only with READPointerAgent" dataset_size = len(train_dataset) for epoch in range(1, self.max_epoch + 1): logging.info("Starting epoch %d", epoch) action_counts = dict() action_counts[ReadPointerAgent.READ_MODE] = [0] * 2 action_counts[ReadPointerAgent. ACT_MODE] = [0] * self.action_space.num_actions() # Test on tuning data agent.test_forced_reading(tune_dataset, tensorboard=self.tensorboard) batch_replay_items = [] total_reward = 0 episodes_in_batch = 0 for data_point_ix, data_point in enumerate(train_dataset): if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) logging.info("Training data action counts %r", action_counts) num_actions = 0 max_num_actions = len(data_point.get_trajectory()) max_num_actions += self.constants["max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) oracle_segments = data_point.get_instruction_oracle_segmented() pose = int(metadata["y_angle"] / 15.0) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose) per_segment_budget = int(max_num_actions / len(oracle_segments)) num_segment_actions = 0 mode = ReadPointerAgent.READ_MODE current_segment_ix = 0 while True: if mode == ReadPointerAgent.READ_MODE: # Find the number of tokens to read for the gold segment num_segment_size = len( oracle_segments[current_segment_ix]) current_segment_ix += 1 for i in range(0, num_segment_size): state = state.update_on_read() mode = ReadPointerAgent.ACT_MODE elif mode == ReadPointerAgent.ACT_MODE: # Sample action using the policy # Generate probabilities over actions probabilities = list( torch.exp(self.model.get_probs(state, mode).data)) # Use test policy to get the action action = gp.sample_action_from_prob(probabilities) action_counts[mode][action] += 1 # deal with act mode boundary conditions if num_actions >= max_num_actions: forced_stop = True break elif action == agent.action_space.get_stop_action_index( ) or num_segment_actions > per_segment_budget: if state.are_tokens_left_to_be_read(): # reward = self._calc_reward_act_halt(state) if metadata["error"] < 5.0: reward = 1.0 else: reward = -1.0 # Add to replay memory replay_item = ReplayMemoryItem( state, agent.action_space.get_stop_action_index(), reward, mode) if action == agent.action_space.get_stop_action_index( ): batch_replay_items.append(replay_item) mode = ReadPointerAgent.READ_MODE agent.server.force_goal_update() state = state.update_on_act_halt() num_segment_actions = 0 else: if action == agent.action_space.get_stop_action_index( ): forced_stop = False else: # stopping due to per segment budget exhaustion forced_stop = True break else: image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, mode=mode) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) state = state.update(image, action, pose=pose) num_actions += 1 num_segment_actions += 1 total_reward += reward else: raise AssertionError( "Mode should be either read or act. Unhandled mode: " + str(mode)) assert mode == ReadPointerAgent.ACT_MODE, "Agent should end on Act Mode" # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, agent.action_space.get_stop_action_index(), reward, mode) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update episodes_in_batch += 1 if episodes_in_batch == 1: loss_val = self.do_update(batch_replay_items) batch_replay_items = [] entropy_val = float(self.entropy.data[0]) self.tensorboard.log(entropy_val, loss_val, total_reward) total_reward = 0 episodes_in_batch = 0 self.tensorboard.log_train_error(metadata["error"]) # Save the model self.model.save_model( experiment_name + "/read_pointer_forced_reading_contextual_bandit_resnet_epoch_" + str(epoch)) logging.info("Training data action counts %r", action_counts)
def _test(self, data_point_ix, data_point, test_image, tensorboard=None, debug=False): image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Test image high_quality_test_image_example = self.get_exploration_image() print("Image shape is ", high_quality_test_image_example.shape) test_image_example = test_image[data_point_ix][0] # Predict the goal predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location( test_image_example, data_point, panaroma=True) current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) print("Predicted Error ", predictor_error) num_actions = 0 actions = [] info = dict() # Dictionary to contain key results info["instruction_string"] = instruction_to_string( data_point.instruction, self.config) info["datapoint_id"] = data_point.get_scene_name() info["stop_dist_error"] = metadata["stop_dist_error"] info["closest_dist_error"] = metadata["closest_dist_error"] info["edit_dist_error"] = metadata["edit_dist_error"] info["num_actions_taken"] = num_actions info["predicted_goal"] = predicted_goal info["predicted_error"] = predictor_error info["gold_goal"] = data_point.get_destination_list()[-1] info["final_location"] = (metadata["x_pos"], metadata["z_pos"]) info["predicted_screen_pixels"] = predicted_pixel self.save_attention_prob(high_quality_test_image_example, attention_prob, info["instruction_string"], info["datapoint_id"]) # self.save_example(image, info["instruction_string"], info["datapoint_id"], scale=5) self.server.halt_and_receive_feedback() return metadata, actions, predictor_error, info
def _test(self, data_point_ix, data_point, test_image, tensorboard=None, debug=False): image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Test image if test_image is None: test_image_example = self.get_exploration_image() else: test_image_example = test_image[data_point_ix][0] # Predict the goal predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location( test_image_example, data_point, panaroma=True) current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) print("Predicted Error ", predictor_error) num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None actions = [] info = dict() while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) actions.append(action) if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_all_test_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Update the scores based on meta_data self.meta_data_util.log_results(metadata) if debug: # Dictionary to contain key results info["instruction_string"] = instruction_to_string( data_point.instruction, self.config) info["datapoint_id"] = data_point.get_scene_name() info["stop_dist_error"] = metadata["stop_dist_error"] info["closest_dist_error"] = metadata["closest_dist_error"] info["edit_dist_error"] = metadata["edit_dist_error"] info["num_actions_taken"] = num_actions info["predicted_goal"] = predicted_goal info["predicted_error"] = predictor_error info["gold_goal"] = data_point.get_destination_list()[-1] info["final_location"] = (metadata["x_pos"], metadata["z_pos"]) info["predicted_screen_pixels"] = predicted_pixel self.save_attention_prob(test_image_example, attention_prob, info["instruction_string"], info["datapoint_id"]) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update(image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) # Set the goal based on the current position and angle current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) num_actions += 1 # logging.info("Error, Start-Distance, Turn-Angle, %r %r %r", metadata["stop_dist_error"], distance, angle) return metadata, actions, predictor_error, info
class Client: """ Client can be in one of the following state: 1. Free and Waiting for new example 2. Waiting to take the next action 3. Waiting to receive the next image and message. Client operates in an automaton following the transitions below: Wait for a new example -> repeat [Take an action -> Wait to receive next image and message ] -> Go back to (1) """ WAITING_FOR_EXAMPLE, WAITING_FOR_ACTION, WAITING_TO_RECEIVE = range(3) def __init__(self, agent, config, constants, tensorboard, client_ix, batch_replay_items): self.agent = agent self.config = config self.constants = constants self.tensorboard = tensorboard # Client specific information self.status = Client.WAITING_FOR_EXAMPLE self.client_ix = client_ix self.server = agent.server # agent.servers[client_ix] self.metadata = None # Datapoint specific variable self.max_num_actions = None self.state = None self.model_state = None self.image_emb_seq = None self.current_data_point = None self.last_action = None self.last_log_prob = None self.factor_entropy = None self.num_action = 0 self.total_reward = 0 self.forced_stop = False self.batch_replay_items = batch_replay_items def get_state(self): return self.state def get_status(self): return self.status def get_model_state(self): return self.model_state def _get_all_rewards(self, metadata): rewards = [] for i in range(0, self.config["num_actions"]): reward = metadata["reward_dict"][ self.agent.action_space.get_action_name(i)] rewards.append(reward) return rewards def try_to_progress(self): # If in state (1) or (2) then return immediately if self.status == Client.WAITING_FOR_EXAMPLE or self.status == Client.WAITING_FOR_ACTION: return self.status assert self.status == Client.WAITING_TO_RECEIVE # If in state (3) then see if the message is available. If the message # is available then return to waiting for an action or a new example. if self.state is None: feedback = self.server.receive_reset_feedback_nonblocking() else: feedback = self.server.receive_feedback_nonblocking() if feedback is None: return self.status else: if self.state is None: # assert False, "state should not be none" # Feedback is in response to reset image, metadata = feedback pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) self.state = AgentObservedState( instruction=self.current_data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=self.current_data_point) # Waiting for action self.status = Client.WAITING_FOR_ACTION else: # Feedback is in response to an action image, reward, metadata = feedback self.total_reward += reward # Create a replay item unless it is forced if not self.forced_stop: all_rewards = self._get_all_rewards(metadata) replay_item = ReplayMemoryItem( self.state, self.last_action, reward, log_prob=self.last_log_prob, image_emb_seq=self.image_emb_seq, factor_entropy=self.factor_entropy, all_rewards=all_rewards) self.batch_replay_items.append(replay_item) # Update the agent state self.state = self.state.update( image, self.last_action, data_point=self.current_data_point) if self.last_action == self.agent.action_space.get_stop_action_index( ): # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) if self.tensorboard is not None: self.tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) self.status = Client.WAITING_FOR_EXAMPLE else: if self.num_action >= self.max_num_actions: # Send forced stop action and wait to receive self._take_forced_stop() self.status = Client.WAITING_TO_RECEIVE else: # Wait to take another action self.status = Client.WAITING_FOR_ACTION self.metadata = metadata return self.status def accept_new_example(self, data_point, max_num_actions): assert self.status == Client.WAITING_FOR_EXAMPLE self.state = None self.metadata = None self.model_state = None self.image_emb_seq = None self.factor_entropy = None self.max_num_actions = max_num_actions self.server.reset_nonblocking(data_point) self.current_data_point = data_point self.last_action = None self.last_log_prob = None self.num_action = 0 self.total_reward = 0 self.forced_stop = False self.status = Client.WAITING_TO_RECEIVE def take_action(self, log_probabilities, new_model_state, image_emb_seq, factor_entropy): assert self.status == Client.WAITING_FOR_ACTION probability = list(torch.exp(log_probabilities.data))[0] self.model_state = new_model_state self.last_log_prob = log_probabilities self.image_emb_seq = image_emb_seq self.factor_entropy = factor_entropy # Use test policy to get the action self.last_action = gp.sample_action_from_prob(probability) self.num_action += 1 # if self.metadata["goal_dist"] < 5: # # Add a forced stop action to replay items # imp_weight = float(probability[3]) # reward = 1.0 # replay_item = ReplayMemoryItem( # self.state, self.agent.action_space.get_stop_action_index(), reward * imp_weight, # log_prob=self.last_log_prob, image_emb_seq=self.image_emb_seq, factor_entropy=self.factor_entropy) # self.batch_replay_items.append(replay_item) if self.last_action == self.agent.action_space.get_stop_action_index(): self.server.halt_nonblocking() else: self.server.send_action_nonblocking(self.last_action) self.status = Client.WAITING_TO_RECEIVE def reset_datapoint_blocking(self, datapoint): """ Resets to the given datapoint and returns starting image """ image, metadata = self.server.reset_receive_feedback(datapoint) return image, metadata def take_action_blocking(self, action): """ Takes an action and returns image, reward and metadata """ if action == self.agent.action_space.get_stop_action_index(): image, reward, metadata = self.server.halt_and_receive_feedback() done = True else: image, reward, metadata = self.server.send_action_receive_feedback( action) done = False return image, reward, metadata, done def _take_forced_stop(self): # Use test policy to get the action self.last_action = self.agent.action_space.get_stop_action_index() self.forced_stop = True self.server.halt_nonblocking() self.status = Client.WAITING_TO_RECEIVE
def test_auto_segmented(self, test_dataset, tensorboard=None, segmenting_type="auto"): assert segmenting_type in ("auto", "oracle") self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() metadata = "" for data_point in test_dataset: if segmenting_type == "auto": segmented_instruction = data_point.get_instruction_auto_segmented( ) else: segmented_instruction = data_point.get_instruction_oracle_segmented( ) num_segments = len(segmented_instruction) gold_num_actions = len(data_point.get_trajectory()) horizon = gold_num_actions // num_segments horizon += self.constants["max_extra_horizon_auto_segmented"] image, metadata = self.server.reset_receive_feedback(data_point) instruction = instruction_to_string(data_point.get_instruction(), self.config) print("TEST INSTRUCTION: %r" % instruction) print("") for instruction_i, instruction in enumerate(segmented_instruction): state = AgentObservedState( instruction=instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction) num_actions = 0 # self._save_agent_state(state, num_actions) while True: # Generate probabilities over actions probabilities = list( torch.exp(self.model.get_probs(state).data)) # print "test probs:", probabilities # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 # logging.info("Taking action-num=%d horizon=%d action=%s from %s", # num_actions, max_num_actions, str(action), str(probabilities)) if action == self.action_space.get_stop_action_index( ) or num_actions >= horizon: break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state state = state.update(image, action) num_actions += 1 _, _, metadata = self.server.halt_and_receive_feedback() if tensorboard is not None: tensorboard.log_test_error(metadata["error"]) self.meta_data_util.log_results(metadata) logging.info("Testing data action counts %r", action_counts)
def test(self, test_dataset, vocab, tensorboard=None, logger=None, pushover_logger=None): self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 metadata = {"feedback": ""} for data_point_ix, data_point in enumerate(test_dataset): image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) state.goal = self.get_goal(metadata) # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices() num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: log_probabilities, model_state = self.model.get_probs( state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) # if tensorboard is not None: # tensorboard.log_all_test_errors( # metadata["edit_dist_error"], # metadata["closest_dist_error"], # metadata["stop_dist_error"]) # self.log("Testing: Taking stop action and got reward %r " % reward, logger) if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata, logger) # self.log("Overall test results: %r " % metadata, logger) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # self.log("Testing: Taking action %r and got reward %r " % (action, reward), logger) # time.sleep(0.5) # Update the agent state state = state.update(image, action, data_point=data_point) state.goal = self.get_goal(metadata) num_actions += 1 task_completion_accuracy = (task_completion_accuracy * 100.0) / float( max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log( "Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger) # self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) pushover_logger.log(pushover_feedback)
def debug_tracking(self, data_point, vocab): self.server.clear_metadata() task_completion_accuracy = 0 image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) num_actions = 0 print("Instruction is ", " ".join([vocab[index] for index in data_point.instruction])) plt.ion() # Get the panoramic image panorama, _ = self.server.explore() # Show the goal location self.show_goal_location(panorama, metadata, size=6) tracking_values = input( "Enter the region, row and column for tracking.") region_ix, row, col = [int(w) for w in tracking_values.split()] if region_ix == 0: camera_ix = 3 elif region_ix == 1: camera_ix = 4 elif region_ix == 2: camera_ix = 5 elif region_ix == 3: camera_ix = 0 elif region_ix == 4: camera_ix = 1 elif region_ix == 5: camera_ix = 2 else: raise AssertionError("Region ix should be in {0, 1, 2, 3, 4, 5}") row_value = row / 32.0 col_value = col / 32.0 self.server.set_tracking(camera_ix, row_value, col_value) input("Tracking done. Enter to continue") while True: # Show the goal location self.show_goal_location(image, metadata, goal_type="inferred", size=1) incorrect_action = True action_string = None while incorrect_action: action_string = input( "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n" ) if action_string in ['0', '1', '2', '3', '4']: incorrect_action = False if action_string == '4': interact_values = input( "Enter the row and column in format: row col") row, col = interact_values.split() row, col = int(row), int(col) action_string = 4 + row * 32 + col action = int(action_string) action_name = self.action_space.get_action_name(action) if action == self.action_space.get_stop_action_index(): # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) print("Metadata is ", metadata) if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state state = state.update(image, action, data_point=data_point) num_actions += 1 print("Metadata is ", metadata) print("Took action %r, Got reward %r" % (action_name, reward))
def do_train(self, agent, train_dataset, tune_dataset, experiment_name): """ Perform training """ assert isinstance( agent, ReadPointerAgent ), "This learning algorithm works only with READPointerAgent" dataset_size = len(train_dataset) for epoch in range(1, self.max_epoch + 1): logging.info("Starting epoch %d", epoch) action_counts = dict() action_counts[ReadPointerAgent.READ_MODE] = [0] * 2 action_counts[ReadPointerAgent. ACT_MODE] = [0] * self.action_space.num_actions() # Test on tuning data agent.test(tune_dataset, tensorboard=self.tensorboard) batch_replay_items = [] total_reward = 0 episodes_in_batch = 0 for data_point_ix, data_point in enumerate(train_dataset): if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) logging.info("Training data action counts %r", action_counts) num_actions = 0 max_num_actions = len(data_point.get_trajectory()) max_num_actions += self.constants["max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None) mode = ReadPointerAgent.READ_MODE last_action_was_halt = False instruction = instruction_to_string( data_point.get_instruction(), self.config) print "TRAIN INSTRUCTION: %r" % instruction print "" while True: # Sample action using the policy # Generate probabilities over actions probabilities = list( torch.exp(self.model.get_probs(state, mode).data)) # Use test policy to get the action action = gp.sample_action_from_prob(probabilities) action_counts[mode][action] += 1 if mode == ReadPointerAgent.READ_MODE: # read mode boundary conditions forced_action = False if not state.are_tokens_left_to_be_read(): # force halt action = 1 forced_action = True elif num_actions >= max_num_actions or last_action_was_halt: # force read action = 0 forced_action = True if not forced_action: # Store reward in the replay memory list reward = self._calc_reward_read_mode(state, action) replay_item = ReplayMemoryItem(state, action, reward, mode=mode) batch_replay_items.append(replay_item) if action == 0: last_action_was_halt = False state = state.update_on_read() elif action == 1: last_action_was_halt = True mode = ReadPointerAgent.ACT_MODE else: raise AssertionError( "Read mode only supports two actions: read(0) and halt(1). " + "Found " + str(action)) elif mode == ReadPointerAgent.ACT_MODE: # deal with act mode boundary conditions if num_actions >= max_num_actions: forced_stop = True break elif action == agent.action_space.get_stop_action_index( ): if state.are_tokens_left_to_be_read(): reward = self._calc_reward_act_halt(state) # Add to replay memory replay_item = ReplayMemoryItem( state, agent.action_space.get_stop_action_index(), reward, mode) batch_replay_items.append(replay_item) mode = ReadPointerAgent.READ_MODE last_action_was_halt = True state = state.update_on_act_halt() else: forced_stop = False break else: image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, mode=mode) batch_replay_items.append(replay_item) # Update the agent state state = state.update(image, action) num_actions += 1 total_reward += reward last_action_was_halt = False else: raise AssertionError( "Mode should be either read or act. Unhandled mode: " + str(mode)) assert mode == ReadPointerAgent.ACT_MODE, "Agent should end on Act Mode" # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, agent.action_space.get_stop_action_index(), reward, mode) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update episodes_in_batch += 1 if episodes_in_batch == 1: loss_val = self.do_update(batch_replay_items) batch_replay_items = [] entropy_val = float(self.entropy.data[0]) self.tensorboard.log(entropy_val, loss_val, total_reward) total_reward = 0 episodes_in_batch = 0 self.tensorboard.log_train_error(metadata["error"]) # Save the model self.model.save_model( experiment_name + "/read_pointer_contextual_bandit_resnet_epoch_" + str(epoch)) logging.info("Training data action counts %r", action_counts)
def test_goal_prediction(self, test_dataset, tensorboard=None, logger=None, pushover_logger=None): self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 sum_loss, count, sum_prob, goal_prob_count = 0, 0, 0, 0 metadata = {"feedback": ""} for data_point_ix, data_point in enumerate(test_dataset): print("Datapoint index ", data_point_ix) image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) ################################## state.goal = GoalPrediction.get_goal_location( metadata, data_point, 8, 8) print("Instruction is ", instruction_to_string(data_point.instruction, self.config)) ################################## # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices() num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None trajectory = data_point.get_trajectory()[0:1] trajectory_len = len(trajectory) while True: if num_actions == trajectory_len: action = self.action_space.get_stop_action_index() else: action = trajectory[num_actions] # Generate probabilities over actions if isinstance(self.model, AbstractModel): raise NotImplementedError() elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, volatile = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] # Compute goal prediction accuracy goal_loss, prob, _ = self.goal_prediction_accuracy( state.goal, volatile) sum_loss += goal_loss count += 1 if prob is not None: sum_prob += prob goal_prob_count += 1 else: raise NotImplementedError() # log_probabilities, model_state = self.model.get_probs(state, model_state) # probabilities = list(torch.exp(log_probabilities.data)) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_all_test_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 # Update the scores based on meta_data self.meta_data_util.log_results(metadata) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) ################################## state.goal = GoalPrediction.get_goal_location( metadata, data_point, 8, 8) ################################## num_actions += 1 print("Finished testing. Now logging.") task_completion_accuracy = (task_completion_accuracy * 100.0) / float( max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log( "Testing: Task completion accuracy is: %r" % task_completion_accuracy, logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log( "Goal Count %r, Mean Goal Loss %r" % (count, sum_loss / float(count)), logger) self.log( "Goal Prob Count %r, Mean Goal Prob %r" % (goal_prob_count, sum_prob / float(goal_prob_count)), logger) self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str( metadata["feedback"] ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy pushover_logger.log(pushover_feedback)
def do_train(self, agent, train_dataset, tune_dataset, experiment_name): """ Perform training """ dataset_size = len(train_dataset) for epoch in range(1, self.max_epoch + 1): logging.info("Starting epoch %d", epoch) action_counts = [0] * self.action_space.num_actions() # Test on tuning data agent.test(tune_dataset, tensorboard=self.tensorboard) batch_replay_items = [] total_reward = 0 episodes_in_batch = 0 for data_point_ix, data_point in enumerate(train_dataset): if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) logging.info("Training data action counts %r", action_counts) # instruction = instruction_to_string( # data_point.get_instruction(), self.config) # print "TRAIN INSTRUCTION: %r" % instruction # print "" instruction = data_point.get_paragraph_instruction() num_actions = 0 max_num_actions = len(data_point.get_trajectory()) max_num_actions += self.constants["max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.get_paragraph_instruction(), config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices( ) forced_stop = True while num_actions < max_num_actions: # Sample action using the policy # Generate probabilities over actions probabilities = list( torch.exp(self.model.get_probs(state).data)) # Use test policy to get the action action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == agent.action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, agent.action_space.get_stop_action_index(), reward) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update episodes_in_batch += 1 if episodes_in_batch == 1: loss_val = self.do_update(batch_replay_items) batch_replay_items = [] # entropy_val = float(self.entropy.data[0]) # self.tensorboard.log(entropy_val, loss_val, total_reward) cross_entropy = float(self.cross_entropy.data[0]) self.tensorboard.log(cross_entropy, loss_val, total_reward) total_reward = 0 episodes_in_batch = 0 if self.tensorboard is not None: self.tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Save the model self.model.save_model(experiment_name + "/contextual_bandit_resnet_epoch_" + str(epoch)) logging.info("Training data action counts %r", action_counts)
def test(self, test_dataset, vocab, tensorboard=None, logger=None, pushover_logger=None): self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 metadata = {"feedback": ""} sum_bisk_metric = 0 for data_point_ix, data_point in enumerate(test_dataset): image, metadata = self.server.reset_receive_feedback(data_point) sum_bisk_metric += metadata["metric"] instruction = self.convert_text_to_indices(metadata["instruction"], vocab) state = AgentObservedState(instruction=instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices() num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: # print "Num action is " + str(num_actions) + " and max is " + str(max_num_actions) log_probabilities, model_state = self.model.get_probs( state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) # if tensorboard is not None: # tensorboard.log_all_test_errors( # metadata["edit_dist_error"], # metadata["closest_dist_error"], # metadata["stop_dist_error"]) # if metadata["stop_dist_error"] < 5.0: # task_completion_accuracy += 1 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata, logger) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state state = state.update(image, action, data_point=data_point) num_actions += 1 self.log("Overall test results:", logger) self.log( "Mean Bisk Metric %r" % (sum_bisk_metric / float(len(test_dataset))), logger) # self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) # self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) pushover_logger.log(pushover_feedback)
def get_3d_location_for_paragraphs(self, exploration_image, instruction, start_pos, goal_pos, panaroma=True): state = AgentObservedState(instruction=instruction, config=self.config, constants=self.constants, start_image=exploration_image, previous_action=None, pose=None, position_orientation=start_pos, data_point=None) volatile = self.predictor_model.get_attention_prob(state, model_state=None) inferred_ix = int( torch.max(volatile["attention_logits"], 0)[1].data.cpu().numpy()[0]) ######################################## # inst_string = instruction_to_string(instruction, self.config) # self.save_attention_prob(exploration_image, volatile["attention_probs"][:-1].view(32, 192), inst_string) ######################################## predicted_row = int(inferred_ix / float(192)) predicted_col = inferred_ix % 192 if panaroma: # Index of the 6 image where the goal is region_index = int(predicted_col / 32) predicted_col = predicted_col % 32 # Column within that image where the goal is pos = start_pos new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\ get_new_pos_angle_from_region_index(region_index, pos) metadata = { "x_pos": pos[0], "z_pos": pos[1], "y_angle": new_pos_angle } else: pos = start_pos metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]} row, col = predicted_row + 0.5, predicted_col + 0.5 start_pos = current_pos_from_metadata(metadata) start_pose = current_pose_from_metadata(metadata) height_drone = 2.5 x_gen, z_gen = get_inverse_object_position( row, col, height_drone, 30, 32, 32, (start_pos[0], start_pos[1], start_pose)) predicted_goal_pos = (x_gen, z_gen) x_goal, z_goal = goal_pos x_diff = x_gen - x_goal z_diff = z_gen - z_goal dist = math.sqrt(x_diff * x_diff + z_diff * z_diff) return predicted_goal_pos, dist
def _test(self, data_point, tensorboard=None, logger=None): image, metadata = self.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, data_point=data_point) num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None actions = [] total_reward = 0.0 while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: log_probabilities, model_state = self.model.get_probs( state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # Use test policy to get the action action = self.test_policy(probabilities) actions.append(action) if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_scalar("navigation_error", metadata["navigation_error"]) total_reward += reward # Update the scores based on meta_data self.log("StreetView Metadata: %r" % metadata, logger) self.log( "Test Example: Num actions %r, Navigation Error %r, Total Reward %r " % (num_actions, metadata["navigation_error"], total_reward), logger) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) total_reward += reward # Update the agent state state = state.update(image, action, data_point=data_point) num_actions += 1 return metadata, actions
def test_auto_segmented(self, test_dataset, logger=None, tensorboard=None, segmenting_type="oracle"): assert segmenting_type in ("auto", "oracle") self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() self.log( "Performing testing on paragraphs with segmenting type %r" % segmenting_type, logger) metadata = {"feedback": ""} for data_point in test_dataset: if segmenting_type == "auto": segmented_instruction = data_point.get_instruction_auto_segmented( ) else: segmented_instruction = data_point.get_instruction_oracle_segmented( ) max_num_actions = self.constants["horizon"] image, metadata = self.server.reset_receive_feedback(data_point) for instruction_i, instruction in enumerate(segmented_instruction): pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Reset the actions taken and model state num_actions = 0 model_state = None # Predict the goal by performing an exploration image and then finding the next suitable place to visit exploration_image, _, _ = self.server.explore() image_slices = [] for img_ctr in range(0, 6): image_slice = exploration_image[ img_ctr * 3:(img_ctr + 1) * 3, :, :] # 3 x height x width # Scale the intensity of the image as done by scipy.misc.imsave image_slice = scipy.misc.bytescale( image_slice.swapaxes(0, 1).swapaxes(1, 2)) image_slices.append(image_slice) # Reorder and horizontally stitch the images reordered_images = [ image_slices[3], image_slices[4], image_slices[5], image_slices[0], image_slices[1], image_slices[2] ] exploration_image = np.hstack(reordered_images).swapaxes( 1, 2).swapaxes(0, 1) # 3 x height x (width*6) start_pos = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) goal_pos = data_point.get_destination_list()[instruction_i] predicted_goal, predictor_error = self.get_3d_location_for_paragraphs( exploration_image, instruction, start_pos, goal_pos, panaroma=True) current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) print("Predicted Error ", predictor_error) while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp( log_probabilities.data))[0] else: raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: intermediate_goal = data_point.get_destination_list( )[instruction_i] agent_position = metadata["x_pos"], metadata["z_pos"] distance = self._l2_distance(agent_position, intermediate_goal) self.log("Instruction is %r " % instruction, logger) self.log( "Predicted Goal is %r, Goal Reached is %r and Real goal is %r " % (predicted_goal, agent_position, intermediate_goal), logger) self.log( "Agent: Position %r got Distance %r " % (instruction_i + 1, distance), logger) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) # Set the goal based on the current position and angle current_bot_location = metadata["x_pos"], metadata[ "z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) num_actions += 1 image, reward, metadata = self.server.halt_and_receive_feedback() if tensorboard is not None: tensorboard.log_all_test_errors(metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Update the scores based on meta_data self.meta_data_util.log_results(metadata) self.meta_data_util.log_results(metadata) logging.info("Testing data action counts %r", action_counts)
def test_auto_segmented(self, test_dataset, segmenting_type="oracle", tensorboard=None, logger=None, pushover_logger=None): assert segmenting_type in ("auto", "oracle") self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() self.log( "Performing testing on paragraphs with segmenting type %r" % segmenting_type, logger) metadata = {"feedback": ""} task_completion_accuracy = 0 for data_point in test_dataset: if segmenting_type == "auto": segmented_instruction = data_point.get_instruction_auto_segmented( ) else: segmented_instruction = data_point.get_instruction_oracle_segmented( ) max_num_actions = self.constants["horizon"] image, metadata = self.server.reset_receive_feedback(data_point) for instruction_i, instruction in enumerate(segmented_instruction): pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Reset the actions taken and model state num_actions = 0 model_state = None while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp( log_probabilities.data))[0] else: raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Compute the l2 distance intermediate_goal = data_point.get_destination_list( )[instruction_i] agent_position = metadata["x_pos"], metadata["z_pos"] distance = self._l2_distance(agent_position, intermediate_goal) # logging.info("Agent: Position %r got Distance %r " % (instruction_i + 1, distance)) # self.log("Agent: Position %r got Distance %r " % (instruction_i + 1, distance), logger) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) num_actions += 1 image, reward, metadata = self.server.halt_and_receive_feedback() if tensorboard is not None: tensorboard.log_all_test_errors(metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Update the scores based on meta_data self.meta_data_util.log_results(metadata) if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 logging.info("Testing data action counts %r", action_counts) task_completion_accuracy = (task_completion_accuracy * 100.0) / float( max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log( "Testing: Task completion accuracy is: %r" % task_completion_accuracy, logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log("Testing data action counts %r" % action_counts, logger) self.meta_data_util.log_results(metadata, logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) + \ " --- " + "task_completion_accuracy=%r" % task_completion_accuracy pushover_logger.log(pushover_feedback)