Beispiel #1
0
    def debug_manual_control(self, data_point, vocab):

        self.server.clear_metadata()
        task_completion_accuracy = 0

        image, metadata = self.server.reset_receive_feedback(data_point)
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=image,
                                   previous_action=None,
                                   data_point=data_point)
        num_actions = 0
        print("Instruction is ",
              " ".join([vocab[index] for index in data_point.instruction]))
        plt.ion()

        while True:
            # Show the goal location
            self.show_goal_location(image, metadata)

            incorrect_action = True
            action_string = None
            while incorrect_action:
                action_string = input(
                    "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n"
                )
                if action_string in ['0', '1', '2', '3', '4']:
                    incorrect_action = False
                if action_string == '4':
                    interact_values = input(
                        "Enter the row and column in format: row col")
                    row, col = interact_values.split()
                    row, col = int(row), int(col)
                    action_string = 4 + row * 32 + col

            action = int(action_string)
            action_name = self.action_space.get_action_name(action)

            if action == self.action_space.get_stop_action_index():
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )

                print("Metadata is ", metadata)
                if metadata["navigation-error"] <= 1.0:
                    task_completion_accuracy += 1
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                state = state.update(image, action, data_point=data_point)
                num_actions += 1

            print("Metadata is ", metadata)
            print("Took action %r, Got reward %r" % (action_name, reward))
    def do_train(self, agent, train_dataset, test_dataset, experiment_name):
        """ Perform training """

        dataset_size = len(train_dataset)
        clock = 0
        clock_max = 1  #32

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            self.test_classifier(agent, test_dataset)

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)

                batch_replay_items = []

                state = AgentObservedState(
                    instruction=data_point.instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=None,  # image,
                    previous_action=None)
                segments = data_point.get_instruction_oracle_segmented()
                segment_lens = [len(s) for s in segments]
                for seg_len in segment_lens:
                    num_read = 0
                    while num_read < seg_len:
                        state = state.update_on_read()
                        num_read += 1
                        if num_read < seg_len:
                            batch_replay_items.append((state, 0))
                        else:
                            batch_replay_items.append((state, 1))
                    state = state.update_on_act_halt()

                # add to global memory
                for replay_item in batch_replay_items:
                    self.global_replay_memory.append(replay_item)

                clock += 1
                if clock % clock_max == 0:
                    batch_replay_items = self.sample_from_global_memory()
                    self.global_replay_memory.clear()
                    clock = 0
                    # Perform update
                    loss_val = self.do_update(batch_replay_items)
                    self.tensorboard.log_loglikelihood_position(loss_val)

            # Save the model
            self.model.save_model(experiment_name +
                                  "/mle_segmentation_prediction_epoch_" +
                                  str(epoch))
Beispiel #3
0
    def test_baseline(self, test_dataset):

        self.server.clear_metadata()

        metadata = {"feedback": ""}
        num_actions_list = []
        task_completion_accuracy = 0
        for data_point in test_dataset:
            image, metadata = self.server.reset_receive_feedback(data_point)
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None)

            num_actions = 0
            # max_num_actions = len(data_point.get_trajectory())
            # max_num_actions += self.constants["max_extra_horizon"]
            num_segments = len(data_point.get_instruction_oracle_segmented())
            max_num_actions = self.constants["horizon"] * num_segments

            while True:

                action = self.get_next_action(data_point, num_actions)

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    num_actions_list.append(num_actions)
                    self.meta_data_util.log_results(metadata)

                    if metadata["stop_dist_error"] < 5.0:
                        task_completion_accuracy += 1
                    break

                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)

                    # Update the agent state
                    state = state.update(image, action)
                    num_actions += 1

                    # self._save_agent_state(state, num_actions)

        self.meta_data_util.log_results(metadata)
        task_completion_accuracy /= float(max(len(test_dataset), 1))
        task_completion_accuracy *= 100.0
        mean_num_actions = float(np.array(num_actions_list).mean())
        logging.info("Task completion accuracy %r", task_completion_accuracy)
        logging.info("Done testing baseline %r, mean num actions is %f",
                     self.baseline_name, mean_num_actions)
Beispiel #4
0
    def test_human_performance(self, dataset, vocab, logger):

        self.server.clear_metadata()

        for data_point in dataset:

            task_completion_accuracy = 0

            image, metadata = self.server.reset_receive_feedback(data_point)
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            num_actions = 0
            print("Instruction is ",
                  " ".join([vocab[index] for index in data_point.instruction]))

            while True:

                incorrect_action = True
                action_string = None
                while incorrect_action:
                    action_string = input(
                        "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n"
                    )
                    if action_string in ['0', '1', '2', '3', '4']:
                        incorrect_action = False
                    if action_string == '4':
                        interact_values = input(
                            "Enter the row and column in format: row col")
                        row, col = interact_values.split()
                        row, col = int(row), int(col)
                        action_string = 4 + row * 32 + col

                action = int(action_string)

                if action == self.action_space.get_stop_action_index():
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )

                    if metadata["navigation-error"] <= 1.0:
                        task_completion_accuracy += 1
                        logger.log("Completed the task")
                    logger.log("Meta data is %r " % metadata)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    num_actions += 1
    def get_3d_location(self, exploration_image, data_point, panaroma=True):

        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=exploration_image,
            previous_action=None,
            pose=None,
            position_orientation=data_point.get_start_pos(),
            data_point=data_point)

        volatile = self.predictor_model.get_attention_prob(state,
                                                           model_state=None)
        inferred_ix = int(
            torch.max(volatile["attention_logits"],
                      0)[1].data.cpu().numpy()[0])
        # Max pointed about that when inferred ix above is the last value then calculations are buggy. He is right.

        predicted_row = int(inferred_ix / float(192))
        predicted_col = inferred_ix % 192
        screen_pos = (predicted_row, predicted_col)

        if panaroma:
            # Index of the 6 image where the goal is
            region_index = int(predicted_col / 32)
            predicted_col = predicted_col % 32  # Column within that image where the goal is
            pos = data_point.get_start_pos()
            new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\
                get_new_pos_angle_from_region_index(region_index, pos)
            metadata = {
                "x_pos": pos[0],
                "z_pos": pos[1],
                "y_angle": new_pos_angle
            }
        else:
            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

        row, col = predicted_row + 0.5, predicted_col + 0.5

        start_pos = current_pos_from_metadata(metadata)
        start_pose = current_pose_from_metadata(metadata)

        goal_pos = data_point.get_destination_list()[-1]
        height_drone = 2.5
        x_gen, z_gen = get_inverse_object_position(
            row, col, height_drone, 30, 32, 32,
            (start_pos[0], start_pos[1], start_pose))
        predicted_goal_pos = (x_gen, z_gen)
        x_goal, z_goal = goal_pos

        x_diff = x_gen - x_goal
        z_diff = z_gen - z_goal

        dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)

        return predicted_goal_pos, dist, screen_pos, volatile[
            "attention_probs"]
Beispiel #6
0
    def debug_human_control(self, data_point, tensorboard=None):

        image, metadata = self.server.reset_receive_feedback(data_point)
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=image,
                                   previous_action=None,
                                   data_point=data_point)
        num_actions = 0
        max_num_actions = self.constants["horizon"]
        actions = []

        message = ""
        for action in range(self.action_space.num_actions()):
            message = message + "%d (%s) " % (
                action, self.action_space.get_action_name(action)) + " "

        while True:
            # Use test policy to get the action
            action = input("Take action according to the message: " +
                           str(message))
            action = int(action)
            actions.append(action)

            if action == self.action_space.get_stop_action_index(
            ) or num_actions >= max_num_actions:
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )
                if tensorboard is not None:
                    tensorboard.log_scalar("navigation_error",
                                           metadata["navigation_error"])

                # Update the scores based on meta_data
                self.meta_data_util.log_results(metadata)
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                state = state.update(image, action, data_point=data_point)
                num_actions += 1

        return metadata, actions
    def calc_log_prob(self, tune_dataset, tune_image, tensorboard):

        total_validation_log_probability = 0
        for data_point_ix, data_point in enumerate(tune_dataset):
            tune_image_example = tune_image[data_point_ix]
            image = tune_image_example[0]

            model_state = None
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       pose=None,
                                       position_orientation=None,
                                       data_point=data_point)
            trajectory = data_point.get_trajectory()

            validation_log_probability = 0

            for action_ix, action in enumerate(trajectory):
                log_probabilities, model_state, image_emb_seq = self.model.get_probs(
                    state, model_state)
                validation_log_probability += float(
                    log_probabilities.data[0][action])
                image = tune_image_example[action_ix + 1]
                state = state.update(image,
                                     action,
                                     pose=None,
                                     position_orientation=None,
                                     data_point=data_point)

            log_probabilities, model_state, image_emb_seq = self.model.get_probs(
                state, model_state)
            validation_log_probability += float(log_probabilities.data[0][
                self.action_space.get_stop_action_index()])
            mean_validation_log_probability = validation_log_probability / float(
                len(trajectory) + 1)
            tensorboard.log_scalar("Validation_Log_Prob",
                                   mean_validation_log_probability)
            total_validation_log_probability += mean_validation_log_probability
        total_validation_log_probability /= float(max(len(tune_dataset), 1))
        logging.info("Mean Validation Log Prob is %r",
                     total_validation_log_probability)
    def _explore_and_set_tracking(self, server, data_point):

        # Get the panoramic image
        panorama, _ = server.explore()

        # Get the panorama and predict the goal location
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=panorama,
                                   previous_action=None,
                                   pose=None,
                                   position_orientation=None,
                                   data_point=data_point)

        volatile = self.local_predictor_model.get_attention_prob(state, model_state=None)
        attention_prob = list(volatile["attention_probs"].view(-1)[:-1].data.cpu().numpy())
        inferred_ix = gp.sample_action_from_prob(attention_prob)
        sampled_prob = volatile["attention_probs"][inferred_ix]

        if inferred_ix == 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]:
            print("Predicting Out-of-sight")
            return

        assert 0 <= inferred_ix < 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]

        row = int(inferred_ix / (6 * self.config["num_manipulation_col"]))
        col = inferred_ix % (6 * self.config["num_manipulation_col"])
        region_ix = int(col / self.config["num_manipulation_col"])

        if region_ix == 0:
            camera_ix = 3
        elif region_ix == 1:
            camera_ix = 4
        elif region_ix == 2:
            camera_ix = 5
        elif region_ix == 3:
            camera_ix = 0
        elif region_ix == 4:
            camera_ix = 1
        elif region_ix == 5:
            camera_ix = 2
        else:
            raise AssertionError("region ix should be in {0, 1, 2, 3, 4, 5}. Found ", region_ix)

        col = col % self.config["num_manipulation_col"]

        # Set tracking
        row_value = min(1.0, (row + 0.5) / float(self.config["num_manipulation_row"]))
        col_value = min(1.0, (col + 0.5) / float(self.config["num_manipulation_col"]))

        server.set_tracking(camera_ix, row_value, col_value)

        return sampled_prob
Beispiel #9
0
    def do_train(self, agent, train_dataset, test_dataset, train_images, test_images, experiment_name):
        """ Perform training """

        dataset_size = len(train_dataset)
        clock = 0
        clock_max = 1

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            self.test_classifier(agent, test_dataset, test_images)

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix, dataset_size)

                batch_replay_items = []

                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=train_images[data_point_ix],
                                           previous_action=None,
                                           data_point=data_point)

                # Store it in the replay memory list
                symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point)
                replay_item = SymbolicTextReplayMemoryItem(state, symbolic_form)
                batch_replay_items.append(replay_item)

                # Global
                for replay_item in batch_replay_items:
                    self.global_replay_memory.append(replay_item)

                clock += 1
                if clock % clock_max == 0:
                    batch_replay_items = self.sample_from_global_memory()
                    self.global_replay_memory.clear()
                    clock = 0
                    # Perform update
                    loss_val = self.do_update(batch_replay_items)
                    self.tensorboard.log_loglikelihood_position(loss_val)

            # Save the model
            self.model.save_model(experiment_name + "/ml_learning_symbolic_text_prediction_epoch_" + str(epoch))
    def do_train_(house_id, shared_model, config, action_space, meta_data_util, constants,
                  train_dataset, tune_dataset, experiment, experiment_name, rank, server,
                  logger, model_type, vocab, use_pushover=False):

        logger.log("In Training...")
        launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json",
                              cwd="./simulators/house/")
        logger.log("Launched Builds.")
        server.initialize_server()
        logger.log("Server Initialized.")

        # Test policy
        test_policy = gp.get_argmax_action

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
            logger.log('Created Tensorboard Server.')
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = None
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=test_policy,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        logger.log("Created Agent.")

        action_counts = [0] * action_space.num_actions()
        max_epochs = 100000 # constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        if tune_dataset_size > 0:
            # Test on tuning data
            tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard,
                           logger=logger, pushover_logger=pushover_logger)

        # Create the learner to compute the loss
        learner = TmpAsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util,
                                                  config, constants, tensorboard)
        # TODO change 2 --- unity launch moved up
        learner.logger = logger

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %(data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" % action_counts)

                num_actions = 0
                max_num_actions = constants["horizon"]
                max_num_actions += constants["max_extra_horizon"]

                image, metadata = tmp_agent.server.reset_receive_feedback(data_point)
                instruction = data_point.get_instruction()
                # instruction_str = TmpAsynchronousContextualBandit.convert_indices_to_text(instruction, vocab)
                # print("Instruction str is ", instruction_str)

                # Pose and Orientation gone TODO change 3
                state = AgentObservedState(instruction=instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)
                state.goal = learner.get_goal(metadata)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # logger.log("Training: Meta Data %r " % metadata)

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = tmp_agent.server.send_action_receive_feedback(action)
                    # logger.log("Action is %r, Reward is %r Probability is %r " % (action, reward, probabilities))

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    # Pose and orientation gone, TODO change 4
                    state = state.update(image, action, data_point=data_point)
                    state.goal = learner.get_goal(metadata)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = tmp_agent.server.halt_and_receive_feedback()
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    # logger.log("Action is Stop, Reward is %r Probability is %r " % (reward, probabilities))
                    replay_item = ReplayMemoryItem(state, action_space.get_stop_action_index(),
                                                   reward, log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32
                    loss_val = learner.do_update(batch_replay_items)

                    if tensorboard is not None:
                        # cross_entropy = float(learner.cross_entropy.data[0])
                        # tensorboard.log(cross_entropy, loss_val, 0)
                        tensorboard.log_scalar("loss", loss_val)
                        entropy = float(learner.entropy.data[0])/float(num_actions + 1)
                        tensorboard.log_scalar("entropy", entropy)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar("Abs_objective_to_entropy_ratio", ratio)
                        tensorboard.log_scalar("total_reward", total_reward)
                        tensorboard.log_scalar("mean navigation error", metadata['mean-navigation-error'])

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(learner.symbolic_language_prediction_loss.data[0])
                            tensorboard.log_scalar("sym_language_prediction_loss", symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard,
                               logger=logger, pushover_logger=pushover_logger)
Beispiel #11
0
    def test_multi_step_action_types(self, test_dataset, vocab, goal_type=None,
                                     tensorboard=None, logger=None, pushover_logger=None):
        """ Perform a single step testing i.e. the goal prediction module is called only once. """

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        metadata = {"feedback": ""}
        text_embedding_model = self.goal_prediction_model.text_module

        for data_point_ix, data_point in enumerate(test_dataset):

            instruction_string = " ".join([vocab[token_id] for token_id in data_point.instruction])
            self.log("Instruction is %r " % instruction_string, logger)

            # Call the action type model to determine the number of steps
            token_indices = self.action_type_model.decoding_from_indices_to_indices(data_point.instruction,
                                                                                    text_embedding_model)

            print("Token indices ", token_indices)
            assert len(token_indices) <= 5

            # Call the navigation model
            image, metadata = self.server.reset_receive_feedback(data_point)

            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            num_inner_loop_steps = int(max_num_actions/max(1, len(token_indices)))
            model_state = None

            for outer_loop_iter in range(0, len(token_indices)):

                if goal_type == "inferred":
                    # Get the panorama and set tracking
                    self._explore_and_set_tracking(data_point, data_point_ix, instruction_string)

                state.goal = self.get_goal(metadata, goal_type)

                for inner_loop_iter in range(0, num_inner_loop_steps):

                    # Generate probabilities over actions
                    if isinstance(self.navigation_model, AbstractModel):
                        probabilities = list(torch.exp(self.navigation_model.get_probs(state).data))
                    elif isinstance(self.navigation_model, AbstractIncrementalModel):
                        log_probabilities, model_state, _, _ = self.navigation_model.get_probs(state, model_state, volatile=True)
                        probabilities = list(torch.exp(log_probabilities.data))[0]
                    else:
                        log_probabilities, model_state = self.navigation_model.get_probs(state, model_state)
                        probabilities = list(torch.exp(log_probabilities.data))

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    if token_indices[outer_loop_iter] == 1:
                        print("Performing interaction")
                        row, col, row_real, col_real = state.goal
                        if row is not None and col is not None:
                            act_name = "interact %r %r" % (row, col)
                            interact_action = self.action_space.get_action_index(act_name)
                            image, reward, metadata = self.server.send_action_receive_feedback(interact_action)

                    if action == self.action_space.get_stop_action_index() or num_actions >= max_num_actions:
                        break
                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(action)

                        # Update the agent state
                        state = state.update(image, action, data_point=data_point)
                        state.goal = self.get_goal(metadata, goal_type)
                        num_actions += 1

                if num_actions >= max_num_actions:
                    break

            # Send the action and get feedback
            image, reward, metadata = self.server.halt_and_receive_feedback()

            if metadata["navigation-error"] <= 1.0:
                task_completion_accuracy += 1

            # Update the scores based on meta_data
            # self.meta_data_util.log_results(metadata, logger)
            self.log("Overall test results: %r " % metadata, logger)

        task_completion_accuracy = (task_completion_accuracy * 100.0)/float(max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log("Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger)
        # self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
Beispiel #12
0
    def test_single_step(self, test_dataset, vocab, goal_type="gold",
                         tensorboard=None, logger=None, pushover_logger=None):
        """ Perform a single step testing i.e. the goal prediction module is called only once. """

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        metadata = {"feedback": ""}

        for data_point_ix, data_point in enumerate(test_dataset):

            instruction_string = " ".join([vocab[token_id] for token_id in data_point.instruction])
            self.log("Instruction is %r " % instruction_string, logger)

            # Call the navigation model
            image, metadata = self.server.reset_receive_feedback(data_point)

            if goal_type == "inferred":
                # Get the panorama and set tracking
                self._explore_and_set_tracking(data_point, data_point_ix, instruction_string)

            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            state.goal = self.get_goal(metadata, goal_type)
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None

            while True:

                # Generate probabilities over actions
                if isinstance(self.navigation_model, AbstractModel):
                    probabilities = list(torch.exp(self.navigation_model.get_probs(state).data))
                elif isinstance(self.navigation_model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, _ = self.navigation_model.get_probs(state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                else:
                    log_probabilities, model_state = self.navigation_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))
                    # raise AssertionError("Unhandled Model type.")

                # Use test policy to get the action
                action = self.test_policy(probabilities)
                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index() or num_actions >= max_num_actions:

                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback()
                    # if tensorboard is not None:
                    #     tensorboard.log_all_test_errors(
                    #         metadata["edit_dist_error"],
                    #         metadata["closest_dist_error"],
                    #         metadata["stop_dist_error"])

                    # self.log("Testing: Taking stop action and got reward %r " % reward, logger)

                    if metadata["navigation-error"] <= 1.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata, logger)
                    self.log("Overall test results: %r " % metadata, logger)

                    #############################################
                    # Take a dummy manipulation action
                    # row, col, row_real, col_real = state.goal
                    # if row is not None and col is not None:
                    #     act_name = "interact %r %r" % (row, col)
                    #     interact_action = self.action_space.get_action_index(act_name)
                    #     image, reward, metadata = self.server.send_action_receive_feedback(interact_action)
                    #############################################

                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(action)
                    # self.log("Testing: Taking action %r and got reward %r " % (action, reward), logger)
                    # time.sleep(0.5)
                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    state.goal = self.get_goal(metadata, goal_type)
                    num_actions += 1

        task_completion_accuracy = (task_completion_accuracy * 100.0)/float(max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log("Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger)
        # self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
Beispiel #13
0
    def _explore_and_set_tracking(self, data_point, data_point_ix, instruction_string):

        # Get the panoramic image
        panorama, _ = self.server.explore()

        ###########################################
        # original_large_panorama = panorama.copy()
        # panorama = scipy.misc.imresize(panorama.swapaxes(0, 1).swapaxes(1, 2), (128, 128*6, 3)).swapaxes(1, 2).swapaxes(0, 1)
        ###########################################

        # Get the panorama and predict the goal location
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=panorama,
                                   previous_action=None,
                                   pose=None,
                                   position_orientation=None,
                                   data_point=data_point)
        volatile = self.goal_prediction_model.get_attention_prob(state, model_state=None)
        inferred_ix = int(torch.max(volatile["attention_logits"], 0)[1].data.cpu().numpy()[0])

        ##########################################
        # self.save_large_panorama_heat_maps(data_point_ix, original_large_panorama,
        #                                    volatile["attention_probs"], instruction_string, scale=5)
        ##########################################

        if inferred_ix == 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]:
            print("Predicting Out-of-sight")
            return None

        assert 0 <= inferred_ix < 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]

        row = int(inferred_ix / (6 * self.config["num_manipulation_col"]))
        col = inferred_ix % (6 * self.config["num_manipulation_col"])
        region_ix = int(col / self.config["num_manipulation_col"])

        if region_ix == 0:
            camera_ix = 3
        elif region_ix == 1:
            camera_ix = 4
        elif region_ix == 2:
            camera_ix = 5
        elif region_ix == 3:
            camera_ix = 0
        elif region_ix == 4:
            camera_ix = 1
        elif region_ix == 5:
            camera_ix = 2
        else:
            raise AssertionError("region ix should be in {0, 1, 2, 3, 4, 5}. Found ", region_ix)

        col = col % self.config["num_manipulation_col"]

        # Set tracking
        row_value = min(1.0, (row + 0.5) / float(self.config["num_manipulation_row"]))
        col_value = min(1.0, (col + 0.5) / float(self.config["num_manipulation_col"]))

        message = self.server.set_tracking(camera_ix, row_value, col_value)

        # self.save_panorama_heat_maps(data_point_ix, panorama, region_ix, row, col, instruction_string)
        return message.decode("utf-8")
Beispiel #14
0
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  vocab,
                  use_pushover=False):

        print("In training...")

        launch_k_unity_builds([config["port"]],
                              "./simulators/house_3_elmer.x86_64")
        server.initialize_server()
        print("launched builds")

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            # pushover_logger = PushoverLogger(experiment_name)
            pushover_logger = None
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=test_policy,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = TmpSupervisedLearning(shared_model, local_model,
                                        action_space, meta_data_util, config,
                                        constants, tensorboard)
        # TODO change 2 --- unity launch moved up

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                image, metadata = tmp_agent.server.reset_receive_feedback(
                    data_point)
                # instruction = TmpSupervisedLearning.convert_text_to_indices(metadata["instruction"], vocab)
                instruction = data_point.get_instruction()

                # Pose and Orientation gone TODO change 3
                state = AgentObservedState(instruction=instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)

                model_state = None
                batch_replay_items = []
                total_reward = 0

                # trajectory = metadata["trajectory"]
                trajectory = data_point.get_trajectory()[0:300]

                for action in trajectory:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)

                    # Sample action from the probability
                    action_counts[action] += 1

                    # Send the action and get feedback
                    image, reward, metadata = tmp_agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    # Pose and orientation gone, TODO change 4
                    state = state.update(image, action, data_point=data_point)

                    total_reward += reward

                # Send final STOP action and get feedback
                # Sample action using the policy
                log_probabilities, model_state, image_emb_seq, state_feature = \
                    local_model.get_probs(state, model_state)
                image, reward, metadata = tmp_agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # if tensorboard is not None:
                #     tensorboard.log_all_train_errors(
                #         metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"])

                # Store it in the replay memory list
                replay_item = ReplayMemoryItem(
                    state,
                    action_space.get_stop_action_index(),
                    reward,
                    log_prob=log_probabilities)
                batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    # del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        # cross_entropy = float(learner.cross_entropy.data[0])
                        # tensorboard.log(cross_entropy, loss_val, 0)
                        num_actions = len(trajectory) + 1
                        tensorboard.log_scalar(
                            "loss_val", loss_val)  # /float(num_actions))
                        entropy = float(
                            learner.entropy.data[0])  # /float(num_actions)
                        tensorboard.log_scalar("entropy", entropy)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)
                        if learner.mean_factor_entropy is not None:
                            mean_factor_entropy = float(
                                learner.mean_factor_entropy.data[0])
                            tensorboard.log_factor_entropy_loss(
                                mean_factor_entropy)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                print("Going for testing")
                tmp_agent.test(tune_dataset,
                               vocab,
                               tensorboard=tensorboard,
                               logger=logger,
                               pushover_logger=pushover_logger)
                print("Done testing")
Beispiel #15
0
    def try_to_progress(self):

        # If in state (1) or (2) then return immediately
        if self.status == Client.WAITING_FOR_EXAMPLE or self.status == Client.WAITING_FOR_ACTION:
            return self.status

        assert self.status == Client.WAITING_TO_RECEIVE

        # If in state (3) then see if the message is available. If the message
        # is available then return to waiting for an action or a new example.
        if self.state is None:
            feedback = self.server.receive_reset_feedback_nonblocking()
        else:
            feedback = self.server.receive_feedback_nonblocking()

        if feedback is None:
            return self.status
        else:
            if self.state is None:
                # assert False, "state should not be none"
                # Feedback is in response to reset
                image, metadata = feedback

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                self.state = AgentObservedState(
                    instruction=self.current_data_point.instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    data_point=self.current_data_point)

                # Waiting for action
                self.status = Client.WAITING_FOR_ACTION
            else:
                # Feedback is in response to an action
                image, reward, metadata = feedback
                self.total_reward += reward

                # Create a replay item unless it is forced
                if not self.forced_stop:
                    all_rewards = self._get_all_rewards(metadata)
                    replay_item = ReplayMemoryItem(
                        self.state,
                        self.last_action,
                        reward,
                        log_prob=self.last_log_prob,
                        image_emb_seq=self.image_emb_seq,
                        factor_entropy=self.factor_entropy,
                        all_rewards=all_rewards)
                    self.batch_replay_items.append(replay_item)

                # Update the agent state
                self.state = self.state.update(
                    image,
                    self.last_action,
                    data_point=self.current_data_point)

                if self.last_action == self.agent.action_space.get_stop_action_index(
                ):
                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata)

                    if self.tensorboard is not None:
                        self.tensorboard.log_all_train_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])
                    self.status = Client.WAITING_FOR_EXAMPLE
                else:

                    if self.num_action >= self.max_num_actions:
                        # Send forced stop action and wait to receive
                        self._take_forced_stop()
                        self.status = Client.WAITING_TO_RECEIVE
                    else:
                        # Wait to take another action
                        self.status = Client.WAITING_FOR_ACTION

            self.metadata = metadata
            return self.status
Beispiel #16
0
    def do_train_forced_reading(self, agent, train_dataset, tune_dataset,
                                experiment_name):
        """ Perform training """

        assert isinstance(
            agent, ReadPointerAgent
        ), "This learning algorithm works only with READPointerAgent"

        dataset_size = len(train_dataset)

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            action_counts = dict()
            action_counts[ReadPointerAgent.READ_MODE] = [0] * 2
            action_counts[ReadPointerAgent.
                          ACT_MODE] = [0] * self.action_space.num_actions()

            # Test on tuning data
            agent.test_forced_reading(tune_dataset,
                                      tensorboard=self.tensorboard)

            batch_replay_items = []
            total_reward = 0
            episodes_in_batch = 0

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                num_actions = 0
                max_num_actions = len(data_point.get_trajectory())
                max_num_actions += self.constants["max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)
                oracle_segments = data_point.get_instruction_oracle_segmented()
                pose = int(metadata["y_angle"] / 15.0)
                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=image,
                                           previous_action=None,
                                           pose=pose)

                per_segment_budget = int(max_num_actions /
                                         len(oracle_segments))
                num_segment_actions = 0

                mode = ReadPointerAgent.READ_MODE
                current_segment_ix = 0

                while True:

                    if mode == ReadPointerAgent.READ_MODE:
                        # Find the number of tokens to read for the gold segment
                        num_segment_size = len(
                            oracle_segments[current_segment_ix])
                        current_segment_ix += 1
                        for i in range(0, num_segment_size):
                            state = state.update_on_read()
                        mode = ReadPointerAgent.ACT_MODE

                    elif mode == ReadPointerAgent.ACT_MODE:

                        # Sample action using the policy
                        # Generate probabilities over actions
                        probabilities = list(
                            torch.exp(self.model.get_probs(state, mode).data))

                        # Use test policy to get the action
                        action = gp.sample_action_from_prob(probabilities)
                        action_counts[mode][action] += 1

                        # deal with act mode boundary conditions
                        if num_actions >= max_num_actions:
                            forced_stop = True
                            break

                        elif action == agent.action_space.get_stop_action_index(
                        ) or num_segment_actions > per_segment_budget:
                            if state.are_tokens_left_to_be_read():
                                # reward = self._calc_reward_act_halt(state)
                                if metadata["error"] < 5.0:
                                    reward = 1.0
                                else:
                                    reward = -1.0

                                # Add to replay memory
                                replay_item = ReplayMemoryItem(
                                    state,
                                    agent.action_space.get_stop_action_index(),
                                    reward, mode)
                                if action == agent.action_space.get_stop_action_index(
                                ):
                                    batch_replay_items.append(replay_item)

                                mode = ReadPointerAgent.READ_MODE
                                agent.server.force_goal_update()
                                state = state.update_on_act_halt()
                                num_segment_actions = 0
                            else:
                                if action == agent.action_space.get_stop_action_index(
                                ):
                                    forced_stop = False
                                else:  # stopping due to per segment budget exhaustion
                                    forced_stop = True
                                break

                        else:
                            image, reward, metadata = agent.server.send_action_receive_feedback(
                                action)

                            # Store it in the replay memory list
                            replay_item = ReplayMemoryItem(state,
                                                           action,
                                                           reward,
                                                           mode=mode)
                            batch_replay_items.append(replay_item)

                            # Update the agent state
                            pose = int(metadata["y_angle"] / 15.0)
                            state = state.update(image, action, pose=pose)

                            num_actions += 1
                            num_segment_actions += 1
                            total_reward += reward

                    else:
                        raise AssertionError(
                            "Mode should be either read or act. Unhandled mode: "
                            + str(mode))

                assert mode == ReadPointerAgent.ACT_MODE, "Agent should end on Act Mode"

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state, agent.action_space.get_stop_action_index(),
                        reward, mode)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                episodes_in_batch += 1
                if episodes_in_batch == 1:
                    loss_val = self.do_update(batch_replay_items)
                    batch_replay_items = []
                    entropy_val = float(self.entropy.data[0])
                    self.tensorboard.log(entropy_val, loss_val, total_reward)
                    total_reward = 0
                    episodes_in_batch = 0

                self.tensorboard.log_train_error(metadata["error"])

            # Save the model
            self.model.save_model(
                experiment_name +
                "/read_pointer_forced_reading_contextual_bandit_resnet_epoch_"
                + str(epoch))

            logging.info("Training data action counts %r", action_counts)
    def _test(self,
              data_point_ix,
              data_point,
              test_image,
              tensorboard=None,
              debug=False):

        image, metadata = self.server.reset_receive_feedback(data_point)
        pose = int(metadata["y_angle"] / 15.0)
        position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                metadata["y_angle"])
        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=image,
            previous_action=None,
            pose=pose,
            position_orientation=position_orientation,
            data_point=data_point,
            prev_instruction=data_point.get_prev_instruction(),
            next_instruction=data_point.get_next_instruction())

        # Test image
        high_quality_test_image_example = self.get_exploration_image()
        print("Image shape is ", high_quality_test_image_example.shape)
        test_image_example = test_image[data_point_ix][0]

        # Predict the goal
        predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location(
            test_image_example, data_point, panaroma=True)
        current_bot_location = metadata["x_pos"], metadata["z_pos"]
        current_bot_pose = metadata["y_angle"]
        state.goal = PredictorPlannerAgent.get_goal_location(
            current_bot_location, current_bot_pose, predicted_goal, 32, 32)
        print("Predicted Error ", predictor_error)

        num_actions = 0
        actions = []
        info = dict()

        # Dictionary to contain key results
        info["instruction_string"] = instruction_to_string(
            data_point.instruction, self.config)
        info["datapoint_id"] = data_point.get_scene_name()
        info["stop_dist_error"] = metadata["stop_dist_error"]
        info["closest_dist_error"] = metadata["closest_dist_error"]
        info["edit_dist_error"] = metadata["edit_dist_error"]
        info["num_actions_taken"] = num_actions
        info["predicted_goal"] = predicted_goal
        info["predicted_error"] = predictor_error
        info["gold_goal"] = data_point.get_destination_list()[-1]
        info["final_location"] = (metadata["x_pos"], metadata["z_pos"])
        info["predicted_screen_pixels"] = predicted_pixel

        self.save_attention_prob(high_quality_test_image_example,
                                 attention_prob, info["instruction_string"],
                                 info["datapoint_id"])

        # self.save_example(image, info["instruction_string"], info["datapoint_id"], scale=5)

        self.server.halt_and_receive_feedback()

        return metadata, actions, predictor_error, info
Beispiel #18
0
    def _test(self,
              data_point_ix,
              data_point,
              test_image,
              tensorboard=None,
              debug=False):

        image, metadata = self.server.reset_receive_feedback(data_point)
        pose = int(metadata["y_angle"] / 15.0)
        position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                metadata["y_angle"])
        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=image,
            previous_action=None,
            pose=pose,
            position_orientation=position_orientation,
            data_point=data_point,
            prev_instruction=data_point.get_prev_instruction(),
            next_instruction=data_point.get_next_instruction())

        # Test image
        if test_image is None:
            test_image_example = self.get_exploration_image()
        else:
            test_image_example = test_image[data_point_ix][0]

        # Predict the goal
        predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location(
            test_image_example, data_point, panaroma=True)
        current_bot_location = metadata["x_pos"], metadata["z_pos"]
        current_bot_pose = metadata["y_angle"]
        state.goal = PredictorPlannerAgent.get_goal_location(
            current_bot_location, current_bot_pose, predicted_goal, 32, 32)
        print("Predicted Error ", predictor_error)

        num_actions = 0
        max_num_actions = self.constants["horizon"]
        model_state = None
        actions = []
        info = dict()

        while True:

            # Generate probabilities over actions
            if isinstance(self.model, AbstractModel):
                probabilities = list(
                    torch.exp(self.model.get_probs(state).data))
            elif isinstance(self.model, AbstractIncrementalModel):
                log_probabilities, model_state, _, _ = self.model.get_probs(
                    state, model_state, volatile=True)
                probabilities = list(torch.exp(log_probabilities.data))[0]
            else:
                raise AssertionError("Unhandled Model type.")

            # Use test policy to get the action
            action = self.test_policy(probabilities)
            actions.append(action)

            if action == self.action_space.get_stop_action_index(
            ) or num_actions >= max_num_actions:
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )
                if tensorboard is not None:
                    tensorboard.log_all_test_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Update the scores based on meta_data
                self.meta_data_util.log_results(metadata)

                if debug:
                    # Dictionary to contain key results
                    info["instruction_string"] = instruction_to_string(
                        data_point.instruction, self.config)
                    info["datapoint_id"] = data_point.get_scene_name()
                    info["stop_dist_error"] = metadata["stop_dist_error"]
                    info["closest_dist_error"] = metadata["closest_dist_error"]
                    info["edit_dist_error"] = metadata["edit_dist_error"]
                    info["num_actions_taken"] = num_actions
                    info["predicted_goal"] = predicted_goal
                    info["predicted_error"] = predictor_error
                    info["gold_goal"] = data_point.get_destination_list()[-1]
                    info["final_location"] = (metadata["x_pos"],
                                              metadata["z_pos"])
                    info["predicted_screen_pixels"] = predicted_pixel

                    self.save_attention_prob(test_image_example,
                                             attention_prob,
                                             info["instruction_string"],
                                             info["datapoint_id"])
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = state.update(image,
                                     action,
                                     pose=pose,
                                     position_orientation=position_orientation,
                                     data_point=data_point)

                # Set the goal based on the current position and angle
                current_bot_location = metadata["x_pos"], metadata["z_pos"]
                current_bot_pose = metadata["y_angle"]
                state.goal = PredictorPlannerAgent.get_goal_location(
                    current_bot_location, current_bot_pose, predicted_goal, 32,
                    32)
                num_actions += 1

        # logging.info("Error, Start-Distance, Turn-Angle,  %r %r %r", metadata["stop_dist_error"], distance, angle)
        return metadata, actions, predictor_error, info
Beispiel #19
0
class Client:
    """ Client can be in one of the following state:
    1. Free and Waiting for new example
    2. Waiting to take the next action
    3. Waiting to receive the next image and message.

    Client operates in an automaton following the transitions below:
    Wait for a new example -> repeat [Take an action -> Wait to receive next image and message ] -> Go back to (1) """

    WAITING_FOR_EXAMPLE, WAITING_FOR_ACTION, WAITING_TO_RECEIVE = range(3)

    def __init__(self, agent, config, constants, tensorboard, client_ix,
                 batch_replay_items):
        self.agent = agent
        self.config = config
        self.constants = constants
        self.tensorboard = tensorboard

        # Client specific information
        self.status = Client.WAITING_FOR_EXAMPLE
        self.client_ix = client_ix
        self.server = agent.server  # agent.servers[client_ix]
        self.metadata = None

        # Datapoint specific variable
        self.max_num_actions = None
        self.state = None
        self.model_state = None
        self.image_emb_seq = None
        self.current_data_point = None
        self.last_action = None
        self.last_log_prob = None
        self.factor_entropy = None
        self.num_action = 0
        self.total_reward = 0
        self.forced_stop = False
        self.batch_replay_items = batch_replay_items

    def get_state(self):
        return self.state

    def get_status(self):
        return self.status

    def get_model_state(self):
        return self.model_state

    def _get_all_rewards(self, metadata):
        rewards = []
        for i in range(0, self.config["num_actions"]):
            reward = metadata["reward_dict"][
                self.agent.action_space.get_action_name(i)]
            rewards.append(reward)
        return rewards

    def try_to_progress(self):

        # If in state (1) or (2) then return immediately
        if self.status == Client.WAITING_FOR_EXAMPLE or self.status == Client.WAITING_FOR_ACTION:
            return self.status

        assert self.status == Client.WAITING_TO_RECEIVE

        # If in state (3) then see if the message is available. If the message
        # is available then return to waiting for an action or a new example.
        if self.state is None:
            feedback = self.server.receive_reset_feedback_nonblocking()
        else:
            feedback = self.server.receive_feedback_nonblocking()

        if feedback is None:
            return self.status
        else:
            if self.state is None:
                # assert False, "state should not be none"
                # Feedback is in response to reset
                image, metadata = feedback

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                self.state = AgentObservedState(
                    instruction=self.current_data_point.instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    data_point=self.current_data_point)

                # Waiting for action
                self.status = Client.WAITING_FOR_ACTION
            else:
                # Feedback is in response to an action
                image, reward, metadata = feedback
                self.total_reward += reward

                # Create a replay item unless it is forced
                if not self.forced_stop:
                    all_rewards = self._get_all_rewards(metadata)
                    replay_item = ReplayMemoryItem(
                        self.state,
                        self.last_action,
                        reward,
                        log_prob=self.last_log_prob,
                        image_emb_seq=self.image_emb_seq,
                        factor_entropy=self.factor_entropy,
                        all_rewards=all_rewards)
                    self.batch_replay_items.append(replay_item)

                # Update the agent state
                self.state = self.state.update(
                    image,
                    self.last_action,
                    data_point=self.current_data_point)

                if self.last_action == self.agent.action_space.get_stop_action_index(
                ):
                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata)

                    if self.tensorboard is not None:
                        self.tensorboard.log_all_train_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])
                    self.status = Client.WAITING_FOR_EXAMPLE
                else:

                    if self.num_action >= self.max_num_actions:
                        # Send forced stop action and wait to receive
                        self._take_forced_stop()
                        self.status = Client.WAITING_TO_RECEIVE
                    else:
                        # Wait to take another action
                        self.status = Client.WAITING_FOR_ACTION

            self.metadata = metadata
            return self.status

    def accept_new_example(self, data_point, max_num_actions):
        assert self.status == Client.WAITING_FOR_EXAMPLE
        self.state = None
        self.metadata = None
        self.model_state = None
        self.image_emb_seq = None
        self.factor_entropy = None
        self.max_num_actions = max_num_actions
        self.server.reset_nonblocking(data_point)
        self.current_data_point = data_point
        self.last_action = None
        self.last_log_prob = None
        self.num_action = 0
        self.total_reward = 0
        self.forced_stop = False
        self.status = Client.WAITING_TO_RECEIVE

    def take_action(self, log_probabilities, new_model_state, image_emb_seq,
                    factor_entropy):
        assert self.status == Client.WAITING_FOR_ACTION

        probability = list(torch.exp(log_probabilities.data))[0]

        self.model_state = new_model_state
        self.last_log_prob = log_probabilities
        self.image_emb_seq = image_emb_seq
        self.factor_entropy = factor_entropy

        # Use test policy to get the action
        self.last_action = gp.sample_action_from_prob(probability)
        self.num_action += 1

        # if self.metadata["goal_dist"] < 5:
        #     # Add a forced stop action to replay items
        #     imp_weight = float(probability[3])
        #     reward = 1.0
        #     replay_item = ReplayMemoryItem(
        #         self.state, self.agent.action_space.get_stop_action_index(), reward * imp_weight,
        #         log_prob=self.last_log_prob, image_emb_seq=self.image_emb_seq, factor_entropy=self.factor_entropy)
        #     self.batch_replay_items.append(replay_item)

        if self.last_action == self.agent.action_space.get_stop_action_index():
            self.server.halt_nonblocking()
        else:
            self.server.send_action_nonblocking(self.last_action)

        self.status = Client.WAITING_TO_RECEIVE

    def reset_datapoint_blocking(self, datapoint):
        """ Resets to the given datapoint and returns starting image """
        image, metadata = self.server.reset_receive_feedback(datapoint)
        return image, metadata

    def take_action_blocking(self, action):
        """ Takes an action and returns image, reward and metadata """

        if action == self.agent.action_space.get_stop_action_index():
            image, reward, metadata = self.server.halt_and_receive_feedback()
            done = True
        else:
            image, reward, metadata = self.server.send_action_receive_feedback(
                action)
            done = False

        return image, reward, metadata, done

    def _take_forced_stop(self):
        # Use test policy to get the action
        self.last_action = self.agent.action_space.get_stop_action_index()
        self.forced_stop = True
        self.server.halt_nonblocking()
        self.status = Client.WAITING_TO_RECEIVE
Beispiel #20
0
    def test_auto_segmented(self,
                            test_dataset,
                            tensorboard=None,
                            segmenting_type="auto"):
        assert segmenting_type in ("auto", "oracle")
        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()

        metadata = ""

        for data_point in test_dataset:
            if segmenting_type == "auto":
                segmented_instruction = data_point.get_instruction_auto_segmented(
                )
            else:
                segmented_instruction = data_point.get_instruction_oracle_segmented(
                )
            num_segments = len(segmented_instruction)
            gold_num_actions = len(data_point.get_trajectory())
            horizon = gold_num_actions // num_segments
            horizon += self.constants["max_extra_horizon_auto_segmented"]

            image, metadata = self.server.reset_receive_feedback(data_point)

            instruction = instruction_to_string(data_point.get_instruction(),
                                                self.config)
            print("TEST INSTRUCTION: %r" % instruction)
            print("")

            for instruction_i, instruction in enumerate(segmented_instruction):

                state = AgentObservedState(
                    instruction=instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    prev_instruction=data_point.get_prev_instruction(),
                    next_instruction=data_point.get_next_instruction)

                num_actions = 0
                # self._save_agent_state(state, num_actions)

                while True:

                    # Generate probabilities over actions
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                    # print "test probs:", probabilities

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    # logging.info("Taking action-num=%d horizon=%d action=%s from %s",
                    #              num_actions, max_num_actions, str(action), str(probabilities))

                    if action == self.action_space.get_stop_action_index(
                    ) or num_actions >= horizon:
                        break

                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(
                            action)

                        # Update the agent state
                        state = state.update(image, action)
                        num_actions += 1

            _, _, metadata = self.server.halt_and_receive_feedback()
            if tensorboard is not None:
                tensorboard.log_test_error(metadata["error"])

        self.meta_data_util.log_results(metadata)
        logging.info("Testing data action counts %r", action_counts)
Beispiel #21
0
    def test(self,
             test_dataset,
             vocab,
             tensorboard=None,
             logger=None,
             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        metadata = {"feedback": ""}
        for data_point_ix, data_point in enumerate(test_dataset):
            image, metadata = self.server.reset_receive_feedback(data_point)
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            state.goal = self.get_goal(metadata)
            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None

            while True:

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, _ = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                else:
                    log_probabilities, model_state = self.model.get_probs(
                        state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))
                    # raise AssertionError("Unhandled Model type.")

                # Use test policy to get the action
                action = self.test_policy(probabilities)
                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    # if tensorboard is not None:
                    #     tensorboard.log_all_test_errors(
                    #         metadata["edit_dist_error"],
                    #         metadata["closest_dist_error"],
                    #         metadata["stop_dist_error"])

                    # self.log("Testing: Taking stop action and got reward %r " % reward, logger)

                    if metadata["navigation-error"] <= 1.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata, logger)
                    # self.log("Overall test results: %r " % metadata, logger)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # self.log("Testing: Taking action %r and got reward %r " % (action, reward), logger)
                    # time.sleep(0.5)
                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    state.goal = self.get_goal(metadata)
                    num_actions += 1

        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log(
            "Testing: Task Completion Accuracy: %r " %
            task_completion_accuracy, logger)
        # self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
Beispiel #22
0
    def debug_tracking(self, data_point, vocab):

        self.server.clear_metadata()
        task_completion_accuracy = 0

        image, metadata = self.server.reset_receive_feedback(data_point)
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=image,
                                   previous_action=None,
                                   data_point=data_point)
        num_actions = 0
        print("Instruction is ",
              " ".join([vocab[index] for index in data_point.instruction]))
        plt.ion()

        # Get the panoramic image
        panorama, _ = self.server.explore()

        # Show the goal location
        self.show_goal_location(panorama, metadata, size=6)

        tracking_values = input(
            "Enter the region, row and column for tracking.")
        region_ix, row, col = [int(w) for w in tracking_values.split()]
        if region_ix == 0:
            camera_ix = 3
        elif region_ix == 1:
            camera_ix = 4
        elif region_ix == 2:
            camera_ix = 5
        elif region_ix == 3:
            camera_ix = 0
        elif region_ix == 4:
            camera_ix = 1
        elif region_ix == 5:
            camera_ix = 2
        else:
            raise AssertionError("Region ix should be in {0, 1, 2, 3, 4, 5}")

        row_value = row / 32.0
        col_value = col / 32.0
        self.server.set_tracking(camera_ix, row_value, col_value)
        input("Tracking done. Enter to continue")

        while True:

            # Show the goal location
            self.show_goal_location(image,
                                    metadata,
                                    goal_type="inferred",
                                    size=1)

            incorrect_action = True
            action_string = None
            while incorrect_action:
                action_string = input(
                    "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n"
                )
                if action_string in ['0', '1', '2', '3', '4']:
                    incorrect_action = False
                if action_string == '4':
                    interact_values = input(
                        "Enter the row and column in format: row col")
                    row, col = interact_values.split()
                    row, col = int(row), int(col)
                    action_string = 4 + row * 32 + col

            action = int(action_string)
            action_name = self.action_space.get_action_name(action)

            if action == self.action_space.get_stop_action_index():
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )

                print("Metadata is ", metadata)
                if metadata["navigation-error"] <= 1.0:
                    task_completion_accuracy += 1
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                state = state.update(image, action, data_point=data_point)
                num_actions += 1

            print("Metadata is ", metadata)
            print("Took action %r, Got reward %r" % (action_name, reward))
Beispiel #23
0
    def do_train(self, agent, train_dataset, tune_dataset, experiment_name):
        """ Perform training """

        assert isinstance(
            agent, ReadPointerAgent
        ), "This learning algorithm works only with READPointerAgent"

        dataset_size = len(train_dataset)

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            action_counts = dict()
            action_counts[ReadPointerAgent.READ_MODE] = [0] * 2
            action_counts[ReadPointerAgent.
                          ACT_MODE] = [0] * self.action_space.num_actions()

            # Test on tuning data
            agent.test(tune_dataset, tensorboard=self.tensorboard)

            batch_replay_items = []
            total_reward = 0
            episodes_in_batch = 0

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                num_actions = 0
                max_num_actions = len(data_point.get_trajectory())
                max_num_actions += self.constants["max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)
                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=image,
                                           previous_action=None)

                mode = ReadPointerAgent.READ_MODE
                last_action_was_halt = False

                instruction = instruction_to_string(
                    data_point.get_instruction(), self.config)
                print "TRAIN INSTRUCTION: %r" % instruction
                print ""

                while True:

                    # Sample action using the policy
                    # Generate probabilities over actions
                    probabilities = list(
                        torch.exp(self.model.get_probs(state, mode).data))

                    # Use test policy to get the action
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[mode][action] += 1

                    if mode == ReadPointerAgent.READ_MODE:
                        # read mode boundary conditions
                        forced_action = False
                        if not state.are_tokens_left_to_be_read():
                            # force halt
                            action = 1
                            forced_action = True
                        elif num_actions >= max_num_actions or last_action_was_halt:
                            # force read
                            action = 0
                            forced_action = True

                        if not forced_action:
                            # Store reward in the replay memory list
                            reward = self._calc_reward_read_mode(state, action)
                            replay_item = ReplayMemoryItem(state,
                                                           action,
                                                           reward,
                                                           mode=mode)
                            batch_replay_items.append(replay_item)

                        if action == 0:
                            last_action_was_halt = False
                            state = state.update_on_read()
                        elif action == 1:
                            last_action_was_halt = True
                            mode = ReadPointerAgent.ACT_MODE
                        else:
                            raise AssertionError(
                                "Read mode only supports two actions: read(0) and halt(1). "
                                + "Found " + str(action))

                    elif mode == ReadPointerAgent.ACT_MODE:
                        # deal with act mode boundary conditions
                        if num_actions >= max_num_actions:
                            forced_stop = True
                            break

                        elif action == agent.action_space.get_stop_action_index(
                        ):
                            if state.are_tokens_left_to_be_read():
                                reward = self._calc_reward_act_halt(state)

                                # Add to replay memory
                                replay_item = ReplayMemoryItem(
                                    state,
                                    agent.action_space.get_stop_action_index(),
                                    reward, mode)
                                batch_replay_items.append(replay_item)

                                mode = ReadPointerAgent.READ_MODE
                                last_action_was_halt = True
                                state = state.update_on_act_halt()
                            else:
                                forced_stop = False
                                break

                        else:
                            image, reward, metadata = agent.server.send_action_receive_feedback(
                                action)

                            # Store it in the replay memory list
                            replay_item = ReplayMemoryItem(state,
                                                           action,
                                                           reward,
                                                           mode=mode)
                            batch_replay_items.append(replay_item)

                            # Update the agent state
                            state = state.update(image, action)

                            num_actions += 1
                            total_reward += reward
                            last_action_was_halt = False

                    else:
                        raise AssertionError(
                            "Mode should be either read or act. Unhandled mode: "
                            + str(mode))

                assert mode == ReadPointerAgent.ACT_MODE, "Agent should end on Act Mode"

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state, agent.action_space.get_stop_action_index(),
                        reward, mode)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                episodes_in_batch += 1
                if episodes_in_batch == 1:
                    loss_val = self.do_update(batch_replay_items)
                    batch_replay_items = []
                    entropy_val = float(self.entropy.data[0])
                    self.tensorboard.log(entropy_val, loss_val, total_reward)
                    total_reward = 0
                    episodes_in_batch = 0

                self.tensorboard.log_train_error(metadata["error"])

            # Save the model
            self.model.save_model(
                experiment_name +
                "/read_pointer_contextual_bandit_resnet_epoch_" + str(epoch))

            logging.info("Training data action counts %r", action_counts)
Beispiel #24
0
    def test_goal_prediction(self,
                             test_dataset,
                             tensorboard=None,
                             logger=None,
                             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        sum_loss, count, sum_prob, goal_prob_count = 0, 0, 0, 0

        metadata = {"feedback": ""}
        for data_point_ix, data_point in enumerate(test_dataset):
            print("Datapoint index ", data_point_ix)
            image, metadata = self.server.reset_receive_feedback(data_point)
            pose = int(metadata["y_angle"] / 15.0)
            position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                    metadata["y_angle"])
            state = AgentObservedState(
                instruction=data_point.instruction,
                config=self.config,
                constants=self.constants,
                start_image=image,
                previous_action=None,
                pose=pose,
                position_orientation=position_orientation,
                data_point=data_point,
                prev_instruction=data_point.get_prev_instruction(),
                next_instruction=data_point.get_next_instruction())

            ##################################
            state.goal = GoalPrediction.get_goal_location(
                metadata, data_point, 8, 8)
            print("Instruction is ",
                  instruction_to_string(data_point.instruction, self.config))
            ##################################

            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None
            trajectory = data_point.get_trajectory()[0:1]
            trajectory_len = len(trajectory)

            while True:

                if num_actions == trajectory_len:
                    action = self.action_space.get_stop_action_index()
                else:
                    action = trajectory[num_actions]

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    raise NotImplementedError()
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, volatile = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                    # Compute goal prediction accuracy
                    goal_loss, prob, _ = self.goal_prediction_accuracy(
                        state.goal, volatile)
                    sum_loss += goal_loss
                    count += 1
                    if prob is not None:
                        sum_prob += prob
                        goal_prob_count += 1
                else:
                    raise NotImplementedError()
                    # log_probabilities, model_state = self.model.get_probs(state, model_state)
                    # probabilities = list(torch.exp(log_probabilities.data))

                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    if tensorboard is not None:
                        tensorboard.log_all_test_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])

                    if metadata["stop_dist_error"] < 5.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    self.meta_data_util.log_results(metadata)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)
                    ##################################
                    state.goal = GoalPrediction.get_goal_location(
                        metadata, data_point, 8, 8)
                    ##################################
                    num_actions += 1

        print("Finished testing. Now logging.")
        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log(
            "Testing: Task completion accuracy is: %r" %
            task_completion_accuracy, logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log(
            "Goal Count %r, Mean Goal Loss %r" %
            (count, sum_loss / float(count)), logger)
        self.log(
            "Goal Prob Count %r, Mean Goal Prob %r" %
            (goal_prob_count, sum_prob / float(goal_prob_count)), logger)

        self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(
                metadata["feedback"]
            ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy
            pushover_logger.log(pushover_feedback)
Beispiel #25
0
    def do_train(self, agent, train_dataset, tune_dataset, experiment_name):
        """ Perform training """

        dataset_size = len(train_dataset)

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            action_counts = [0] * self.action_space.num_actions()

            # Test on tuning data
            agent.test(tune_dataset, tensorboard=self.tensorboard)

            batch_replay_items = []
            total_reward = 0
            episodes_in_batch = 0

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                # instruction = instruction_to_string(
                #     data_point.get_instruction(), self.config)
                # print "TRAIN INSTRUCTION: %r" % instruction
                # print ""

                instruction = data_point.get_paragraph_instruction()

                num_actions = 0
                max_num_actions = len(data_point.get_trajectory())
                max_num_actions += self.constants["max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)
                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=data_point.get_paragraph_instruction(),
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point)
                state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices(
                )

                forced_stop = True

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    # Generate probabilities over actions
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))

                    # Use test policy to get the action
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == agent.action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state, action, reward)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state, agent.action_space.get_stop_action_index(),
                        reward)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                episodes_in_batch += 1
                if episodes_in_batch == 1:
                    loss_val = self.do_update(batch_replay_items)
                    batch_replay_items = []
                    # entropy_val = float(self.entropy.data[0])
                    # self.tensorboard.log(entropy_val, loss_val, total_reward)
                    cross_entropy = float(self.cross_entropy.data[0])
                    self.tensorboard.log(cross_entropy, loss_val, total_reward)
                    total_reward = 0
                    episodes_in_batch = 0

                if self.tensorboard is not None:
                    self.tensorboard.log_all_train_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

            # Save the model
            self.model.save_model(experiment_name +
                                  "/contextual_bandit_resnet_epoch_" +
                                  str(epoch))

            logging.info("Training data action counts %r", action_counts)
Beispiel #26
0
    def test(self,
             test_dataset,
             vocab,
             tensorboard=None,
             logger=None,
             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        metadata = {"feedback": ""}
        sum_bisk_metric = 0
        for data_point_ix, data_point in enumerate(test_dataset):
            image, metadata = self.server.reset_receive_feedback(data_point)
            sum_bisk_metric += metadata["metric"]
            instruction = self.convert_text_to_indices(metadata["instruction"],
                                                       vocab)
            state = AgentObservedState(instruction=instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None

            while True:

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, _ = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                else:
                    # print "Num action is " + str(num_actions) + " and max is " + str(max_num_actions)
                    log_probabilities, model_state = self.model.get_probs(
                        state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))
                    # raise AssertionError("Unhandled Model type.")

                # Use test policy to get the action
                action = self.test_policy(probabilities)
                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    # if tensorboard is not None:
                    #     tensorboard.log_all_test_errors(
                    #         metadata["edit_dist_error"],
                    #         metadata["closest_dist_error"],
                    #         metadata["stop_dist_error"])

                    # if metadata["stop_dist_error"] < 5.0:
                    #     task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata, logger)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    num_actions += 1

        self.log("Overall test results:", logger)
        self.log(
            "Mean Bisk Metric %r" %
            (sum_bisk_metric / float(len(test_dataset))), logger)
        # self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        # self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
    def get_3d_location_for_paragraphs(self,
                                       exploration_image,
                                       instruction,
                                       start_pos,
                                       goal_pos,
                                       panaroma=True):

        state = AgentObservedState(instruction=instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=exploration_image,
                                   previous_action=None,
                                   pose=None,
                                   position_orientation=start_pos,
                                   data_point=None)

        volatile = self.predictor_model.get_attention_prob(state,
                                                           model_state=None)
        inferred_ix = int(
            torch.max(volatile["attention_logits"],
                      0)[1].data.cpu().numpy()[0])

        ########################################
        # inst_string = instruction_to_string(instruction, self.config)
        # self.save_attention_prob(exploration_image, volatile["attention_probs"][:-1].view(32, 192), inst_string)
        ########################################

        predicted_row = int(inferred_ix / float(192))
        predicted_col = inferred_ix % 192

        if panaroma:
            # Index of the 6 image where the goal is
            region_index = int(predicted_col / 32)
            predicted_col = predicted_col % 32  # Column within that image where the goal is
            pos = start_pos
            new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\
                get_new_pos_angle_from_region_index(region_index, pos)
            metadata = {
                "x_pos": pos[0],
                "z_pos": pos[1],
                "y_angle": new_pos_angle
            }
        else:
            pos = start_pos
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

        row, col = predicted_row + 0.5, predicted_col + 0.5

        start_pos = current_pos_from_metadata(metadata)
        start_pose = current_pose_from_metadata(metadata)

        height_drone = 2.5
        x_gen, z_gen = get_inverse_object_position(
            row, col, height_drone, 30, 32, 32,
            (start_pos[0], start_pos[1], start_pose))
        predicted_goal_pos = (x_gen, z_gen)
        x_goal, z_goal = goal_pos

        x_diff = x_gen - x_goal
        z_diff = z_gen - z_goal

        dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)

        return predicted_goal_pos, dist
Beispiel #28
0
    def _test(self, data_point, tensorboard=None, logger=None):

        image, metadata = self.server.reset_receive_feedback(data_point)
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=image,
                                   previous_action=None,
                                   data_point=data_point)
        num_actions = 0
        max_num_actions = self.constants["horizon"]
        model_state = None
        actions = []
        total_reward = 0.0

        while True:

            # Generate probabilities over actions
            if isinstance(self.model, AbstractModel):
                probabilities = list(
                    torch.exp(self.model.get_probs(state).data))
            elif isinstance(self.model, AbstractIncrementalModel):
                log_probabilities, model_state, _, _ = self.model.get_probs(
                    state, model_state, volatile=True)
                probabilities = list(torch.exp(log_probabilities.data))[0]
            else:
                log_probabilities, model_state = self.model.get_probs(
                    state, model_state)
                probabilities = list(torch.exp(log_probabilities.data))

            # Use test policy to get the action
            action = self.test_policy(probabilities)
            actions.append(action)

            if action == self.action_space.get_stop_action_index(
            ) or num_actions >= max_num_actions:
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )

                if tensorboard is not None:
                    tensorboard.log_scalar("navigation_error",
                                           metadata["navigation_error"])

                total_reward += reward

                # Update the scores based on meta_data
                self.log("StreetView Metadata: %r" % metadata, logger)
                self.log(
                    "Test Example: Num actions %r, Navigation Error %r, Total Reward %r "
                    %
                    (num_actions, metadata["navigation_error"], total_reward),
                    logger)
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)

                total_reward += reward

                # Update the agent state
                state = state.update(image, action, data_point=data_point)
                num_actions += 1

        return metadata, actions
    def test_auto_segmented(self,
                            test_dataset,
                            logger=None,
                            tensorboard=None,
                            segmenting_type="oracle"):

        assert segmenting_type in ("auto", "oracle")
        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()

        self.log(
            "Performing testing on paragraphs with segmenting type %r" %
            segmenting_type, logger)
        metadata = {"feedback": ""}

        for data_point in test_dataset:
            if segmenting_type == "auto":
                segmented_instruction = data_point.get_instruction_auto_segmented(
                )
            else:
                segmented_instruction = data_point.get_instruction_oracle_segmented(
                )

            max_num_actions = self.constants["horizon"]
            image, metadata = self.server.reset_receive_feedback(data_point)

            for instruction_i, instruction in enumerate(segmented_instruction):

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point,
                    prev_instruction=data_point.get_prev_instruction(),
                    next_instruction=data_point.get_next_instruction())

                # Reset the actions taken and model state
                num_actions = 0
                model_state = None

                # Predict the goal by performing an exploration image and then finding the next suitable place to visit
                exploration_image, _, _ = self.server.explore()
                image_slices = []
                for img_ctr in range(0, 6):
                    image_slice = exploration_image[
                        img_ctr * 3:(img_ctr + 1) *
                        3, :, :]  # 3 x height x width
                    # Scale the intensity of the image as done by scipy.misc.imsave
                    image_slice = scipy.misc.bytescale(
                        image_slice.swapaxes(0, 1).swapaxes(1, 2))
                    image_slices.append(image_slice)

                # Reorder and horizontally stitch the images
                reordered_images = [
                    image_slices[3], image_slices[4], image_slices[5],
                    image_slices[0], image_slices[1], image_slices[2]
                ]
                exploration_image = np.hstack(reordered_images).swapaxes(
                    1, 2).swapaxes(0, 1)  # 3 x height x (width*6)

                start_pos = (metadata["x_pos"], metadata["z_pos"],
                             metadata["y_angle"])
                goal_pos = data_point.get_destination_list()[instruction_i]
                predicted_goal, predictor_error = self.get_3d_location_for_paragraphs(
                    exploration_image,
                    instruction,
                    start_pos,
                    goal_pos,
                    panaroma=True)
                current_bot_location = metadata["x_pos"], metadata["z_pos"]
                current_bot_pose = metadata["y_angle"]
                state.goal = PredictorPlannerAgent.get_goal_location(
                    current_bot_location, current_bot_pose, predicted_goal, 32,
                    32)
                print("Predicted Error ", predictor_error)

                while True:

                    # Generate probabilities over actions
                    if isinstance(self.model, AbstractModel):
                        probabilities = list(
                            torch.exp(self.model.get_probs(state).data))
                    elif isinstance(self.model, AbstractIncrementalModel):
                        log_probabilities, model_state, _, _ = self.model.get_probs(
                            state, model_state, volatile=True)
                        probabilities = list(torch.exp(
                            log_probabilities.data))[0]
                    else:
                        raise AssertionError("Unhandled Model type.")

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    if action == self.action_space.get_stop_action_index(
                    ) or num_actions >= max_num_actions:

                        intermediate_goal = data_point.get_destination_list(
                        )[instruction_i]
                        agent_position = metadata["x_pos"], metadata["z_pos"]
                        distance = self._l2_distance(agent_position,
                                                     intermediate_goal)
                        self.log("Instruction is %r " % instruction, logger)
                        self.log(
                            "Predicted Goal is %r, Goal Reached is %r and Real goal is %r "
                            % (predicted_goal, agent_position,
                               intermediate_goal), logger)
                        self.log(
                            "Agent: Position %r got Distance %r " %
                            (instruction_i + 1, distance), logger)
                        break

                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(
                            action)

                        # Update the agent state
                        pose = int(metadata["y_angle"] / 15.0)
                        position_orientation = (metadata["x_pos"],
                                                metadata["z_pos"],
                                                metadata["y_angle"])
                        state = state.update(
                            image,
                            action,
                            pose=pose,
                            position_orientation=position_orientation,
                            data_point=data_point)

                        # Set the goal based on the current position and angle
                        current_bot_location = metadata["x_pos"], metadata[
                            "z_pos"]
                        current_bot_pose = metadata["y_angle"]
                        state.goal = PredictorPlannerAgent.get_goal_location(
                            current_bot_location, current_bot_pose,
                            predicted_goal, 32, 32)

                        num_actions += 1

            image, reward, metadata = self.server.halt_and_receive_feedback()
            if tensorboard is not None:
                tensorboard.log_all_test_errors(metadata["edit_dist_error"],
                                                metadata["closest_dist_error"],
                                                metadata["stop_dist_error"])

            # Update the scores based on meta_data
            self.meta_data_util.log_results(metadata)

        self.meta_data_util.log_results(metadata)
        logging.info("Testing data action counts %r", action_counts)
Beispiel #30
0
    def test_auto_segmented(self,
                            test_dataset,
                            segmenting_type="oracle",
                            tensorboard=None,
                            logger=None,
                            pushover_logger=None):

        assert segmenting_type in ("auto", "oracle")
        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()

        self.log(
            "Performing testing on paragraphs with segmenting type %r" %
            segmenting_type, logger)
        metadata = {"feedback": ""}

        task_completion_accuracy = 0

        for data_point in test_dataset:
            if segmenting_type == "auto":
                segmented_instruction = data_point.get_instruction_auto_segmented(
                )
            else:
                segmented_instruction = data_point.get_instruction_oracle_segmented(
                )

            max_num_actions = self.constants["horizon"]
            image, metadata = self.server.reset_receive_feedback(data_point)

            for instruction_i, instruction in enumerate(segmented_instruction):

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point,
                    prev_instruction=data_point.get_prev_instruction(),
                    next_instruction=data_point.get_next_instruction())

                # Reset the actions taken and model state
                num_actions = 0
                model_state = None

                while True:

                    # Generate probabilities over actions
                    if isinstance(self.model, AbstractModel):
                        probabilities = list(
                            torch.exp(self.model.get_probs(state).data))
                    elif isinstance(self.model, AbstractIncrementalModel):
                        log_probabilities, model_state, _, _ = self.model.get_probs(
                            state, model_state, volatile=True)
                        probabilities = list(torch.exp(
                            log_probabilities.data))[0]
                    else:
                        raise AssertionError("Unhandled Model type.")

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    if action == self.action_space.get_stop_action_index(
                    ) or num_actions >= max_num_actions:
                        # Compute the l2 distance

                        intermediate_goal = data_point.get_destination_list(
                        )[instruction_i]
                        agent_position = metadata["x_pos"], metadata["z_pos"]
                        distance = self._l2_distance(agent_position,
                                                     intermediate_goal)
                        # logging.info("Agent: Position %r got Distance %r " % (instruction_i + 1, distance))
                        # self.log("Agent: Position %r got Distance %r " % (instruction_i + 1, distance), logger)
                        break

                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(
                            action)

                        # Update the agent state
                        pose = int(metadata["y_angle"] / 15.0)
                        position_orientation = (metadata["x_pos"],
                                                metadata["z_pos"],
                                                metadata["y_angle"])
                        state = state.update(
                            image,
                            action,
                            pose=pose,
                            position_orientation=position_orientation,
                            data_point=data_point)
                        num_actions += 1

            image, reward, metadata = self.server.halt_and_receive_feedback()
            if tensorboard is not None:
                tensorboard.log_all_test_errors(metadata["edit_dist_error"],
                                                metadata["closest_dist_error"],
                                                metadata["stop_dist_error"])

            # Update the scores based on meta_data
            self.meta_data_util.log_results(metadata)

            if metadata["stop_dist_error"] < 5.0:
                task_completion_accuracy += 1

        logging.info("Testing data action counts %r", action_counts)
        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log(
            "Testing: Task completion accuracy is: %r" %
            task_completion_accuracy, logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        self.meta_data_util.log_results(metadata, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"]) + \
                                " --- " + "task_completion_accuracy=%r" % task_completion_accuracy
            pushover_logger.log(pushover_feedback)