def get_3d_location(self, exploration_image, data_point, panaroma=True):

        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=exploration_image,
            previous_action=None,
            pose=None,
            position_orientation=data_point.get_start_pos(),
            data_point=data_point)

        volatile = self.predictor_model.get_attention_prob(state,
                                                           model_state=None)
        inferred_ix = int(
            torch.max(volatile["attention_logits"],
                      0)[1].data.cpu().numpy()[0])
        # Max pointed about that when inferred ix above is the last value then calculations are buggy. He is right.

        predicted_row = int(inferred_ix / float(192))
        predicted_col = inferred_ix % 192
        screen_pos = (predicted_row, predicted_col)

        if panaroma:
            # Index of the 6 image where the goal is
            region_index = int(predicted_col / 32)
            predicted_col = predicted_col % 32  # Column within that image where the goal is
            pos = data_point.get_start_pos()
            new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\
                get_new_pos_angle_from_region_index(region_index, pos)
            metadata = {
                "x_pos": pos[0],
                "z_pos": pos[1],
                "y_angle": new_pos_angle
            }
        else:
            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

        row, col = predicted_row + 0.5, predicted_col + 0.5

        start_pos = current_pos_from_metadata(metadata)
        start_pose = current_pose_from_metadata(metadata)

        goal_pos = data_point.get_destination_list()[-1]
        height_drone = 2.5
        x_gen, z_gen = get_inverse_object_position(
            row, col, height_drone, 30, 32, 32,
            (start_pos[0], start_pos[1], start_pose))
        predicted_goal_pos = (x_gen, z_gen)
        x_goal, z_goal = goal_pos

        x_diff = x_gen - x_goal
        z_diff = z_gen - z_goal

        dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)

        return predicted_goal_pos, dist, screen_pos, volatile[
            "attention_probs"]
Esempio n. 2
0
    def debug_manual_control(self, data_point, vocab):

        self.server.clear_metadata()
        task_completion_accuracy = 0

        image, metadata = self.server.reset_receive_feedback(data_point)
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=image,
                                   previous_action=None,
                                   data_point=data_point)
        num_actions = 0
        print("Instruction is ",
              " ".join([vocab[index] for index in data_point.instruction]))
        plt.ion()

        while True:
            # Show the goal location
            self.show_goal_location(image, metadata)

            incorrect_action = True
            action_string = None
            while incorrect_action:
                action_string = input(
                    "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n"
                )
                if action_string in ['0', '1', '2', '3', '4']:
                    incorrect_action = False
                if action_string == '4':
                    interact_values = input(
                        "Enter the row and column in format: row col")
                    row, col = interact_values.split()
                    row, col = int(row), int(col)
                    action_string = 4 + row * 32 + col

            action = int(action_string)
            action_name = self.action_space.get_action_name(action)

            if action == self.action_space.get_stop_action_index():
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )

                print("Metadata is ", metadata)
                if metadata["navigation-error"] <= 1.0:
                    task_completion_accuracy += 1
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                state = state.update(image, action, data_point=data_point)
                num_actions += 1

            print("Metadata is ", metadata)
            print("Took action %r, Got reward %r" % (action_name, reward))
    def do_train(self, agent, train_dataset, test_dataset, experiment_name):
        """ Perform training """

        dataset_size = len(train_dataset)
        clock = 0
        clock_max = 1  #32

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            self.test_classifier(agent, test_dataset)

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)

                batch_replay_items = []

                state = AgentObservedState(
                    instruction=data_point.instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=None,  # image,
                    previous_action=None)
                segments = data_point.get_instruction_oracle_segmented()
                segment_lens = [len(s) for s in segments]
                for seg_len in segment_lens:
                    num_read = 0
                    while num_read < seg_len:
                        state = state.update_on_read()
                        num_read += 1
                        if num_read < seg_len:
                            batch_replay_items.append((state, 0))
                        else:
                            batch_replay_items.append((state, 1))
                    state = state.update_on_act_halt()

                # add to global memory
                for replay_item in batch_replay_items:
                    self.global_replay_memory.append(replay_item)

                clock += 1
                if clock % clock_max == 0:
                    batch_replay_items = self.sample_from_global_memory()
                    self.global_replay_memory.clear()
                    clock = 0
                    # Perform update
                    loss_val = self.do_update(batch_replay_items)
                    self.tensorboard.log_loglikelihood_position(loss_val)

            # Save the model
            self.model.save_model(experiment_name +
                                  "/mle_segmentation_prediction_epoch_" +
                                  str(epoch))
Esempio n. 4
0
    def test_baseline(self, test_dataset):

        self.server.clear_metadata()

        metadata = {"feedback": ""}
        num_actions_list = []
        task_completion_accuracy = 0
        for data_point in test_dataset:
            image, metadata = self.server.reset_receive_feedback(data_point)
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None)

            num_actions = 0
            # max_num_actions = len(data_point.get_trajectory())
            # max_num_actions += self.constants["max_extra_horizon"]
            num_segments = len(data_point.get_instruction_oracle_segmented())
            max_num_actions = self.constants["horizon"] * num_segments

            while True:

                action = self.get_next_action(data_point, num_actions)

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    num_actions_list.append(num_actions)
                    self.meta_data_util.log_results(metadata)

                    if metadata["stop_dist_error"] < 5.0:
                        task_completion_accuracy += 1
                    break

                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)

                    # Update the agent state
                    state = state.update(image, action)
                    num_actions += 1

                    # self._save_agent_state(state, num_actions)

        self.meta_data_util.log_results(metadata)
        task_completion_accuracy /= float(max(len(test_dataset), 1))
        task_completion_accuracy *= 100.0
        mean_num_actions = float(np.array(num_actions_list).mean())
        logging.info("Task completion accuracy %r", task_completion_accuracy)
        logging.info("Done testing baseline %r, mean num actions is %f",
                     self.baseline_name, mean_num_actions)
Esempio n. 5
0
    def test_human_performance(self, dataset, vocab, logger):

        self.server.clear_metadata()

        for data_point in dataset:

            task_completion_accuracy = 0

            image, metadata = self.server.reset_receive_feedback(data_point)
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            num_actions = 0
            print("Instruction is ",
                  " ".join([vocab[index] for index in data_point.instruction]))

            while True:

                incorrect_action = True
                action_string = None
                while incorrect_action:
                    action_string = input(
                        "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n"
                    )
                    if action_string in ['0', '1', '2', '3', '4']:
                        incorrect_action = False
                    if action_string == '4':
                        interact_values = input(
                            "Enter the row and column in format: row col")
                        row, col = interact_values.split()
                        row, col = int(row), int(col)
                        action_string = 4 + row * 32 + col

                action = int(action_string)

                if action == self.action_space.get_stop_action_index():
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )

                    if metadata["navigation-error"] <= 1.0:
                        task_completion_accuracy += 1
                        logger.log("Completed the task")
                    logger.log("Meta data is %r " % metadata)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    num_actions += 1
    def _explore_and_set_tracking(self, server, data_point):

        # Get the panoramic image
        panorama, _ = server.explore()

        # Get the panorama and predict the goal location
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=panorama,
                                   previous_action=None,
                                   pose=None,
                                   position_orientation=None,
                                   data_point=data_point)

        volatile = self.local_predictor_model.get_attention_prob(state, model_state=None)
        attention_prob = list(volatile["attention_probs"].view(-1)[:-1].data.cpu().numpy())
        inferred_ix = gp.sample_action_from_prob(attention_prob)
        sampled_prob = volatile["attention_probs"][inferred_ix]

        if inferred_ix == 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]:
            print("Predicting Out-of-sight")
            return

        assert 0 <= inferred_ix < 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]

        row = int(inferred_ix / (6 * self.config["num_manipulation_col"]))
        col = inferred_ix % (6 * self.config["num_manipulation_col"])
        region_ix = int(col / self.config["num_manipulation_col"])

        if region_ix == 0:
            camera_ix = 3
        elif region_ix == 1:
            camera_ix = 4
        elif region_ix == 2:
            camera_ix = 5
        elif region_ix == 3:
            camera_ix = 0
        elif region_ix == 4:
            camera_ix = 1
        elif region_ix == 5:
            camera_ix = 2
        else:
            raise AssertionError("region ix should be in {0, 1, 2, 3, 4, 5}. Found ", region_ix)

        col = col % self.config["num_manipulation_col"]

        # Set tracking
        row_value = min(1.0, (row + 0.5) / float(self.config["num_manipulation_row"]))
        col_value = min(1.0, (col + 0.5) / float(self.config["num_manipulation_col"]))

        server.set_tracking(camera_ix, row_value, col_value)

        return sampled_prob
Esempio n. 7
0
    def do_train(self, agent, train_dataset, test_dataset, train_images, test_images, experiment_name):
        """ Perform training """

        dataset_size = len(train_dataset)
        clock = 0
        clock_max = 1

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            self.test_classifier(agent, test_dataset, test_images)

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix, dataset_size)

                batch_replay_items = []

                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=train_images[data_point_ix],
                                           previous_action=None,
                                           data_point=data_point)

                # Store it in the replay memory list
                symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point)
                replay_item = SymbolicTextReplayMemoryItem(state, symbolic_form)
                batch_replay_items.append(replay_item)

                # Global
                for replay_item in batch_replay_items:
                    self.global_replay_memory.append(replay_item)

                clock += 1
                if clock % clock_max == 0:
                    batch_replay_items = self.sample_from_global_memory()
                    self.global_replay_memory.clear()
                    clock = 0
                    # Perform update
                    loss_val = self.do_update(batch_replay_items)
                    self.tensorboard.log_loglikelihood_position(loss_val)

            # Save the model
            self.model.save_model(experiment_name + "/ml_learning_symbolic_text_prediction_epoch_" + str(epoch))
Esempio n. 8
0
    def debug_human_control(self, data_point, tensorboard=None):

        image, metadata = self.server.reset_receive_feedback(data_point)
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=image,
                                   previous_action=None,
                                   data_point=data_point)
        num_actions = 0
        max_num_actions = self.constants["horizon"]
        actions = []

        message = ""
        for action in range(self.action_space.num_actions()):
            message = message + "%d (%s) " % (
                action, self.action_space.get_action_name(action)) + " "

        while True:
            # Use test policy to get the action
            action = input("Take action according to the message: " +
                           str(message))
            action = int(action)
            actions.append(action)

            if action == self.action_space.get_stop_action_index(
            ) or num_actions >= max_num_actions:
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )
                if tensorboard is not None:
                    tensorboard.log_scalar("navigation_error",
                                           metadata["navigation_error"])

                # Update the scores based on meta_data
                self.meta_data_util.log_results(metadata)
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                state = state.update(image, action, data_point=data_point)
                num_actions += 1

        return metadata, actions
    def calc_log_prob(self, tune_dataset, tune_image, tensorboard):

        total_validation_log_probability = 0
        for data_point_ix, data_point in enumerate(tune_dataset):
            tune_image_example = tune_image[data_point_ix]
            image = tune_image_example[0]

            model_state = None
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       pose=None,
                                       position_orientation=None,
                                       data_point=data_point)
            trajectory = data_point.get_trajectory()

            validation_log_probability = 0

            for action_ix, action in enumerate(trajectory):
                log_probabilities, model_state, image_emb_seq = self.model.get_probs(
                    state, model_state)
                validation_log_probability += float(
                    log_probabilities.data[0][action])
                image = tune_image_example[action_ix + 1]
                state = state.update(image,
                                     action,
                                     pose=None,
                                     position_orientation=None,
                                     data_point=data_point)

            log_probabilities, model_state, image_emb_seq = self.model.get_probs(
                state, model_state)
            validation_log_probability += float(log_probabilities.data[0][
                self.action_space.get_stop_action_index()])
            mean_validation_log_probability = validation_log_probability / float(
                len(trajectory) + 1)
            tensorboard.log_scalar("Validation_Log_Prob",
                                   mean_validation_log_probability)
            total_validation_log_probability += mean_validation_log_probability
        total_validation_log_probability /= float(max(len(tune_dataset), 1))
        logging.info("Mean Validation Log Prob is %r",
                     total_validation_log_probability)
Esempio n. 10
0
    def test(self,
             test_dataset,
             tensorboard=None,
             logger=None,
             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0
        print("Reached Test")
        test_dataset_size = len(test_dataset)

        metadata = {"feedback": ""}
        data_point = random.sample(test_dataset, 1)[0]
        while True:

            print("Please enter an instruction. For sample see:")
            # data_point = random.sample(test_dataset, 1)[0]
            image, metadata = self.server.reset_receive_feedback(data_point)
            print(
                "Sample instruction: ",
                instruction_to_string(data_point.get_instruction(),
                                      self.config))
            input_instruction = input(
                "Enter an instruction or enter q to quit ")
            if input_instruction == "q" or input_instruction == "quit":
                break
            input_instruction_ids = self.convert_to_id(input_instruction)

            pose = int(metadata["y_angle"] / 15.0)
            position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                    metadata["y_angle"])
            state = AgentObservedState(
                instruction=input_instruction_ids,
                config=self.config,
                constants=self.constants,
                start_image=image,
                previous_action=None,
                pose=pose,
                position_orientation=position_orientation,
                data_point=data_point,
                prev_instruction=data_point.get_prev_instruction(),
                next_instruction=data_point.get_next_instruction())
            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None
            # print "Model state is new "
            while True:

                time.sleep(0.3)

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, _ = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                else:
                    # print "Num action is " + str(num_actions) + " and max is " + str(max_num_actions)
                    log_probabilities, model_state = self.model.get_probs(
                        state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))
                    # raise AssertionError("Unhandled Model type.")

                # Use test policy to get the action
                action = self.test_policy(probabilities)
                # DONT FORGET TO REMOVE
                # action = np.random.randint(0, 2)
                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    if tensorboard is not None:
                        tensorboard.log_all_test_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])

                    if metadata["stop_dist_error"] < 5.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    self.meta_data_util.log_results(metadata)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)
                    num_actions += 1

        print("Finished testing. Now logging.")
        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log(
            "Testing: Task completion accuracy is: %r" %
            task_completion_accuracy, logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(
                metadata["feedback"]
            ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy
            pushover_logger.log(pushover_feedback)
Esempio n. 11
0
    def do_train(self, agent, train_dataset, tune_dataset, experiment_name):
        """ Perform training """

        assert isinstance(
            agent, ReadPointerAgent
        ), "This learning algorithm works only with READPointerAgent"

        dataset_size = len(train_dataset)

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            action_counts = dict()
            action_counts[ReadPointerAgent.READ_MODE] = [0] * 2
            action_counts[ReadPointerAgent.
                          ACT_MODE] = [0] * self.action_space.num_actions()

            # Test on tuning data
            agent.test(tune_dataset, tensorboard=self.tensorboard)

            batch_replay_items = []
            total_reward = 0
            episodes_in_batch = 0

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                num_actions = 0
                max_num_actions = len(data_point.get_trajectory())
                max_num_actions += self.constants["max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)
                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=image,
                                           previous_action=None)

                mode = ReadPointerAgent.READ_MODE
                last_action_was_halt = False

                instruction = instruction_to_string(
                    data_point.get_instruction(), self.config)
                print "TRAIN INSTRUCTION: %r" % instruction
                print ""

                while True:

                    # Sample action using the policy
                    # Generate probabilities over actions
                    probabilities = list(
                        torch.exp(self.model.get_probs(state, mode).data))

                    # Use test policy to get the action
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[mode][action] += 1

                    if mode == ReadPointerAgent.READ_MODE:
                        # read mode boundary conditions
                        forced_action = False
                        if not state.are_tokens_left_to_be_read():
                            # force halt
                            action = 1
                            forced_action = True
                        elif num_actions >= max_num_actions or last_action_was_halt:
                            # force read
                            action = 0
                            forced_action = True

                        if not forced_action:
                            # Store reward in the replay memory list
                            reward = self._calc_reward_read_mode(state, action)
                            replay_item = ReplayMemoryItem(state,
                                                           action,
                                                           reward,
                                                           mode=mode)
                            batch_replay_items.append(replay_item)

                        if action == 0:
                            last_action_was_halt = False
                            state = state.update_on_read()
                        elif action == 1:
                            last_action_was_halt = True
                            mode = ReadPointerAgent.ACT_MODE
                        else:
                            raise AssertionError(
                                "Read mode only supports two actions: read(0) and halt(1). "
                                + "Found " + str(action))

                    elif mode == ReadPointerAgent.ACT_MODE:
                        # deal with act mode boundary conditions
                        if num_actions >= max_num_actions:
                            forced_stop = True
                            break

                        elif action == agent.action_space.get_stop_action_index(
                        ):
                            if state.are_tokens_left_to_be_read():
                                reward = self._calc_reward_act_halt(state)

                                # Add to replay memory
                                replay_item = ReplayMemoryItem(
                                    state,
                                    agent.action_space.get_stop_action_index(),
                                    reward, mode)
                                batch_replay_items.append(replay_item)

                                mode = ReadPointerAgent.READ_MODE
                                last_action_was_halt = True
                                state = state.update_on_act_halt()
                            else:
                                forced_stop = False
                                break

                        else:
                            image, reward, metadata = agent.server.send_action_receive_feedback(
                                action)

                            # Store it in the replay memory list
                            replay_item = ReplayMemoryItem(state,
                                                           action,
                                                           reward,
                                                           mode=mode)
                            batch_replay_items.append(replay_item)

                            # Update the agent state
                            state = state.update(image, action)

                            num_actions += 1
                            total_reward += reward
                            last_action_was_halt = False

                    else:
                        raise AssertionError(
                            "Mode should be either read or act. Unhandled mode: "
                            + str(mode))

                assert mode == ReadPointerAgent.ACT_MODE, "Agent should end on Act Mode"

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state, agent.action_space.get_stop_action_index(),
                        reward, mode)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                episodes_in_batch += 1
                if episodes_in_batch == 1:
                    loss_val = self.do_update(batch_replay_items)
                    batch_replay_items = []
                    entropy_val = float(self.entropy.data[0])
                    self.tensorboard.log(entropy_val, loss_val, total_reward)
                    total_reward = 0
                    episodes_in_batch = 0

                self.tensorboard.log_train_error(metadata["error"])

            # Save the model
            self.model.save_model(
                experiment_name +
                "/read_pointer_contextual_bandit_resnet_epoch_" + str(epoch))

            logging.info("Training data action counts %r", action_counts)
Esempio n. 12
0
    def test_multi_step_action_types(self, test_dataset, vocab, goal_type=None,
                                     tensorboard=None, logger=None, pushover_logger=None):
        """ Perform a single step testing i.e. the goal prediction module is called only once. """

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        metadata = {"feedback": ""}
        text_embedding_model = self.goal_prediction_model.text_module

        for data_point_ix, data_point in enumerate(test_dataset):

            instruction_string = " ".join([vocab[token_id] for token_id in data_point.instruction])
            self.log("Instruction is %r " % instruction_string, logger)

            # Call the action type model to determine the number of steps
            token_indices = self.action_type_model.decoding_from_indices_to_indices(data_point.instruction,
                                                                                    text_embedding_model)

            print("Token indices ", token_indices)
            assert len(token_indices) <= 5

            # Call the navigation model
            image, metadata = self.server.reset_receive_feedback(data_point)

            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            num_inner_loop_steps = int(max_num_actions/max(1, len(token_indices)))
            model_state = None

            for outer_loop_iter in range(0, len(token_indices)):

                if goal_type == "inferred":
                    # Get the panorama and set tracking
                    self._explore_and_set_tracking(data_point, data_point_ix, instruction_string)

                state.goal = self.get_goal(metadata, goal_type)

                for inner_loop_iter in range(0, num_inner_loop_steps):

                    # Generate probabilities over actions
                    if isinstance(self.navigation_model, AbstractModel):
                        probabilities = list(torch.exp(self.navigation_model.get_probs(state).data))
                    elif isinstance(self.navigation_model, AbstractIncrementalModel):
                        log_probabilities, model_state, _, _ = self.navigation_model.get_probs(state, model_state, volatile=True)
                        probabilities = list(torch.exp(log_probabilities.data))[0]
                    else:
                        log_probabilities, model_state = self.navigation_model.get_probs(state, model_state)
                        probabilities = list(torch.exp(log_probabilities.data))

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    if token_indices[outer_loop_iter] == 1:
                        print("Performing interaction")
                        row, col, row_real, col_real = state.goal
                        if row is not None and col is not None:
                            act_name = "interact %r %r" % (row, col)
                            interact_action = self.action_space.get_action_index(act_name)
                            image, reward, metadata = self.server.send_action_receive_feedback(interact_action)

                    if action == self.action_space.get_stop_action_index() or num_actions >= max_num_actions:
                        break
                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(action)

                        # Update the agent state
                        state = state.update(image, action, data_point=data_point)
                        state.goal = self.get_goal(metadata, goal_type)
                        num_actions += 1

                if num_actions >= max_num_actions:
                    break

            # Send the action and get feedback
            image, reward, metadata = self.server.halt_and_receive_feedback()

            if metadata["navigation-error"] <= 1.0:
                task_completion_accuracy += 1

            # Update the scores based on meta_data
            # self.meta_data_util.log_results(metadata, logger)
            self.log("Overall test results: %r " % metadata, logger)

        task_completion_accuracy = (task_completion_accuracy * 100.0)/float(max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log("Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger)
        # self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
    def do_train_(house_id, shared_model, config, action_space, meta_data_util, constants,
                  train_dataset, tune_dataset, experiment, experiment_name, rank, server,
                  logger, model_type, vocab, use_pushover=False):

        logger.log("In Training...")
        launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json",
                              cwd="./simulators/house/")
        logger.log("Launched Builds.")
        server.initialize_server()
        logger.log("Server Initialized.")

        # Test policy
        test_policy = gp.get_argmax_action

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
            logger.log('Created Tensorboard Server.')
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = None
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=test_policy,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        logger.log("Created Agent.")

        action_counts = [0] * action_space.num_actions()
        max_epochs = 100000 # constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        if tune_dataset_size > 0:
            # Test on tuning data
            tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard,
                           logger=logger, pushover_logger=pushover_logger)

        # Create the learner to compute the loss
        learner = TmpAsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util,
                                                  config, constants, tensorboard)
        # TODO change 2 --- unity launch moved up
        learner.logger = logger

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %(data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" % action_counts)

                num_actions = 0
                max_num_actions = constants["horizon"]
                max_num_actions += constants["max_extra_horizon"]

                image, metadata = tmp_agent.server.reset_receive_feedback(data_point)
                instruction = data_point.get_instruction()
                # instruction_str = TmpAsynchronousContextualBandit.convert_indices_to_text(instruction, vocab)
                # print("Instruction str is ", instruction_str)

                # Pose and Orientation gone TODO change 3
                state = AgentObservedState(instruction=instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)
                state.goal = learner.get_goal(metadata)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # logger.log("Training: Meta Data %r " % metadata)

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = tmp_agent.server.send_action_receive_feedback(action)
                    # logger.log("Action is %r, Reward is %r Probability is %r " % (action, reward, probabilities))

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    # Pose and orientation gone, TODO change 4
                    state = state.update(image, action, data_point=data_point)
                    state.goal = learner.get_goal(metadata)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = tmp_agent.server.halt_and_receive_feedback()
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    # logger.log("Action is Stop, Reward is %r Probability is %r " % (reward, probabilities))
                    replay_item = ReplayMemoryItem(state, action_space.get_stop_action_index(),
                                                   reward, log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32
                    loss_val = learner.do_update(batch_replay_items)

                    if tensorboard is not None:
                        # cross_entropy = float(learner.cross_entropy.data[0])
                        # tensorboard.log(cross_entropy, loss_val, 0)
                        tensorboard.log_scalar("loss", loss_val)
                        entropy = float(learner.entropy.data[0])/float(num_actions + 1)
                        tensorboard.log_scalar("entropy", entropy)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar("Abs_objective_to_entropy_ratio", ratio)
                        tensorboard.log_scalar("total_reward", total_reward)
                        tensorboard.log_scalar("mean navigation error", metadata['mean-navigation-error'])

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(learner.symbolic_language_prediction_loss.data[0])
                            tensorboard.log_scalar("sym_language_prediction_loss", symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard,
                               logger=logger, pushover_logger=pushover_logger)
Esempio n. 14
0
    def _explore_and_set_tracking(self, data_point, data_point_ix, instruction_string):

        # Get the panoramic image
        panorama, _ = self.server.explore()

        ###########################################
        # original_large_panorama = panorama.copy()
        # panorama = scipy.misc.imresize(panorama.swapaxes(0, 1).swapaxes(1, 2), (128, 128*6, 3)).swapaxes(1, 2).swapaxes(0, 1)
        ###########################################

        # Get the panorama and predict the goal location
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=panorama,
                                   previous_action=None,
                                   pose=None,
                                   position_orientation=None,
                                   data_point=data_point)
        volatile = self.goal_prediction_model.get_attention_prob(state, model_state=None)
        inferred_ix = int(torch.max(volatile["attention_logits"], 0)[1].data.cpu().numpy()[0])

        ##########################################
        # self.save_large_panorama_heat_maps(data_point_ix, original_large_panorama,
        #                                    volatile["attention_probs"], instruction_string, scale=5)
        ##########################################

        if inferred_ix == 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]:
            print("Predicting Out-of-sight")
            return None

        assert 0 <= inferred_ix < 6 * self.config["num_manipulation_row"] * self.config["num_manipulation_col"]

        row = int(inferred_ix / (6 * self.config["num_manipulation_col"]))
        col = inferred_ix % (6 * self.config["num_manipulation_col"])
        region_ix = int(col / self.config["num_manipulation_col"])

        if region_ix == 0:
            camera_ix = 3
        elif region_ix == 1:
            camera_ix = 4
        elif region_ix == 2:
            camera_ix = 5
        elif region_ix == 3:
            camera_ix = 0
        elif region_ix == 4:
            camera_ix = 1
        elif region_ix == 5:
            camera_ix = 2
        else:
            raise AssertionError("region ix should be in {0, 1, 2, 3, 4, 5}. Found ", region_ix)

        col = col % self.config["num_manipulation_col"]

        # Set tracking
        row_value = min(1.0, (row + 0.5) / float(self.config["num_manipulation_row"]))
        col_value = min(1.0, (col + 0.5) / float(self.config["num_manipulation_col"]))

        message = self.server.set_tracking(camera_ix, row_value, col_value)

        # self.save_panorama_heat_maps(data_point_ix, panorama, region_ix, row, col, instruction_string)
        return message.decode("utf-8")
Esempio n. 15
0
    def test_single_step(self, test_dataset, vocab, goal_type="gold",
                         tensorboard=None, logger=None, pushover_logger=None):
        """ Perform a single step testing i.e. the goal prediction module is called only once. """

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        metadata = {"feedback": ""}

        for data_point_ix, data_point in enumerate(test_dataset):

            instruction_string = " ".join([vocab[token_id] for token_id in data_point.instruction])
            self.log("Instruction is %r " % instruction_string, logger)

            # Call the navigation model
            image, metadata = self.server.reset_receive_feedback(data_point)

            if goal_type == "inferred":
                # Get the panorama and set tracking
                self._explore_and_set_tracking(data_point, data_point_ix, instruction_string)

            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            state.goal = self.get_goal(metadata, goal_type)
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None

            while True:

                # Generate probabilities over actions
                if isinstance(self.navigation_model, AbstractModel):
                    probabilities = list(torch.exp(self.navigation_model.get_probs(state).data))
                elif isinstance(self.navigation_model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, _ = self.navigation_model.get_probs(state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                else:
                    log_probabilities, model_state = self.navigation_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))
                    # raise AssertionError("Unhandled Model type.")

                # Use test policy to get the action
                action = self.test_policy(probabilities)
                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index() or num_actions >= max_num_actions:

                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback()
                    # if tensorboard is not None:
                    #     tensorboard.log_all_test_errors(
                    #         metadata["edit_dist_error"],
                    #         metadata["closest_dist_error"],
                    #         metadata["stop_dist_error"])

                    # self.log("Testing: Taking stop action and got reward %r " % reward, logger)

                    if metadata["navigation-error"] <= 1.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata, logger)
                    self.log("Overall test results: %r " % metadata, logger)

                    #############################################
                    # Take a dummy manipulation action
                    # row, col, row_real, col_real = state.goal
                    # if row is not None and col is not None:
                    #     act_name = "interact %r %r" % (row, col)
                    #     interact_action = self.action_space.get_action_index(act_name)
                    #     image, reward, metadata = self.server.send_action_receive_feedback(interact_action)
                    #############################################

                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(action)
                    # self.log("Testing: Taking action %r and got reward %r " % (action, reward), logger)
                    # time.sleep(0.5)
                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    state.goal = self.get_goal(metadata, goal_type)
                    num_actions += 1

        task_completion_accuracy = (task_completion_accuracy * 100.0)/float(max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log("Testing: Task Completion Accuracy: %r " % task_completion_accuracy, logger)
        # self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
Esempio n. 16
0
    def test_goal_prediction(self,
                             test_dataset,
                             tensorboard=None,
                             logger=None,
                             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        sum_loss, count, sum_prob, goal_prob_count = 0, 0, 0, 0

        metadata = {"feedback": ""}
        for data_point_ix, data_point in enumerate(test_dataset):
            print("Datapoint index ", data_point_ix)
            image, metadata = self.server.reset_receive_feedback(data_point)
            pose = int(metadata["y_angle"] / 15.0)
            position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                    metadata["y_angle"])
            state = AgentObservedState(
                instruction=data_point.instruction,
                config=self.config,
                constants=self.constants,
                start_image=image,
                previous_action=None,
                pose=pose,
                position_orientation=position_orientation,
                data_point=data_point,
                prev_instruction=data_point.get_prev_instruction(),
                next_instruction=data_point.get_next_instruction())

            ##################################
            state.goal = GoalPrediction.get_goal_location(
                metadata, data_point, 8, 8)
            print("Instruction is ",
                  instruction_to_string(data_point.instruction, self.config))
            ##################################

            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None
            trajectory = data_point.get_trajectory()[0:1]
            trajectory_len = len(trajectory)

            while True:

                if num_actions == trajectory_len:
                    action = self.action_space.get_stop_action_index()
                else:
                    action = trajectory[num_actions]

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    raise NotImplementedError()
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, volatile = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                    # Compute goal prediction accuracy
                    goal_loss, prob, _ = self.goal_prediction_accuracy(
                        state.goal, volatile)
                    sum_loss += goal_loss
                    count += 1
                    if prob is not None:
                        sum_prob += prob
                        goal_prob_count += 1
                else:
                    raise NotImplementedError()
                    # log_probabilities, model_state = self.model.get_probs(state, model_state)
                    # probabilities = list(torch.exp(log_probabilities.data))

                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    if tensorboard is not None:
                        tensorboard.log_all_test_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])

                    if metadata["stop_dist_error"] < 5.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    self.meta_data_util.log_results(metadata)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)
                    ##################################
                    state.goal = GoalPrediction.get_goal_location(
                        metadata, data_point, 8, 8)
                    ##################################
                    num_actions += 1

        print("Finished testing. Now logging.")
        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log(
            "Testing: Task completion accuracy is: %r" %
            task_completion_accuracy, logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log(
            "Goal Count %r, Mean Goal Loss %r" %
            (count, sum_loss / float(count)), logger)
        self.log(
            "Goal Prob Count %r, Mean Goal Prob %r" %
            (goal_prob_count, sum_prob / float(goal_prob_count)), logger)

        self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(
                metadata["feedback"]
            ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy
            pushover_logger.log(pushover_feedback)
Esempio n. 17
0
    def test(self,
             test_dataset,
             vocab,
             tensorboard=None,
             logger=None,
             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        metadata = {"feedback": ""}
        for data_point_ix, data_point in enumerate(test_dataset):
            image, metadata = self.server.reset_receive_feedback(data_point)
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            state.goal = self.get_goal(metadata)
            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None

            while True:

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, _ = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                else:
                    log_probabilities, model_state = self.model.get_probs(
                        state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))
                    # raise AssertionError("Unhandled Model type.")

                # Use test policy to get the action
                action = self.test_policy(probabilities)
                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    # if tensorboard is not None:
                    #     tensorboard.log_all_test_errors(
                    #         metadata["edit_dist_error"],
                    #         metadata["closest_dist_error"],
                    #         metadata["stop_dist_error"])

                    # self.log("Testing: Taking stop action and got reward %r " % reward, logger)

                    if metadata["navigation-error"] <= 1.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata, logger)
                    # self.log("Overall test results: %r " % metadata, logger)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # self.log("Testing: Taking action %r and got reward %r " % (action, reward), logger)
                    # time.sleep(0.5)
                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    state.goal = self.get_goal(metadata)
                    num_actions += 1

        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log(
            "Testing: Task Completion Accuracy: %r " %
            task_completion_accuracy, logger)
        # self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
    def _test(self,
              data_point_ix,
              data_point,
              test_image,
              tensorboard=None,
              debug=False):

        image, metadata = self.server.reset_receive_feedback(data_point)
        pose = int(metadata["y_angle"] / 15.0)
        position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                metadata["y_angle"])
        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=image,
            previous_action=None,
            pose=pose,
            position_orientation=position_orientation,
            data_point=data_point,
            prev_instruction=data_point.get_prev_instruction(),
            next_instruction=data_point.get_next_instruction())

        # Test image
        high_quality_test_image_example = self.get_exploration_image()
        print("Image shape is ", high_quality_test_image_example.shape)
        test_image_example = test_image[data_point_ix][0]

        # Predict the goal
        predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location(
            test_image_example, data_point, panaroma=True)
        current_bot_location = metadata["x_pos"], metadata["z_pos"]
        current_bot_pose = metadata["y_angle"]
        state.goal = PredictorPlannerAgent.get_goal_location(
            current_bot_location, current_bot_pose, predicted_goal, 32, 32)
        print("Predicted Error ", predictor_error)

        num_actions = 0
        actions = []
        info = dict()

        # Dictionary to contain key results
        info["instruction_string"] = instruction_to_string(
            data_point.instruction, self.config)
        info["datapoint_id"] = data_point.get_scene_name()
        info["stop_dist_error"] = metadata["stop_dist_error"]
        info["closest_dist_error"] = metadata["closest_dist_error"]
        info["edit_dist_error"] = metadata["edit_dist_error"]
        info["num_actions_taken"] = num_actions
        info["predicted_goal"] = predicted_goal
        info["predicted_error"] = predictor_error
        info["gold_goal"] = data_point.get_destination_list()[-1]
        info["final_location"] = (metadata["x_pos"], metadata["z_pos"])
        info["predicted_screen_pixels"] = predicted_pixel

        self.save_attention_prob(high_quality_test_image_example,
                                 attention_prob, info["instruction_string"],
                                 info["datapoint_id"])

        # self.save_example(image, info["instruction_string"], info["datapoint_id"], scale=5)

        self.server.halt_and_receive_feedback()

        return metadata, actions, predictor_error, info
Esempio n. 19
0
    def _test(self,
              data_point_ix,
              data_point,
              test_image,
              tensorboard=None,
              debug=False):

        image, metadata = self.server.reset_receive_feedback(data_point)
        pose = int(metadata["y_angle"] / 15.0)
        position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                metadata["y_angle"])
        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=image,
            previous_action=None,
            pose=pose,
            position_orientation=position_orientation,
            data_point=data_point,
            prev_instruction=data_point.get_prev_instruction(),
            next_instruction=data_point.get_next_instruction())

        # Test image
        if test_image is None:
            test_image_example = self.get_exploration_image()
        else:
            test_image_example = test_image[data_point_ix][0]

        # Predict the goal
        predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location(
            test_image_example, data_point, panaroma=True)
        current_bot_location = metadata["x_pos"], metadata["z_pos"]
        current_bot_pose = metadata["y_angle"]
        state.goal = PredictorPlannerAgent.get_goal_location(
            current_bot_location, current_bot_pose, predicted_goal, 32, 32)
        print("Predicted Error ", predictor_error)

        num_actions = 0
        max_num_actions = self.constants["horizon"]
        model_state = None
        actions = []
        info = dict()

        while True:

            # Generate probabilities over actions
            if isinstance(self.model, AbstractModel):
                probabilities = list(
                    torch.exp(self.model.get_probs(state).data))
            elif isinstance(self.model, AbstractIncrementalModel):
                log_probabilities, model_state, _, _ = self.model.get_probs(
                    state, model_state, volatile=True)
                probabilities = list(torch.exp(log_probabilities.data))[0]
            else:
                raise AssertionError("Unhandled Model type.")

            # Use test policy to get the action
            action = self.test_policy(probabilities)
            actions.append(action)

            if action == self.action_space.get_stop_action_index(
            ) or num_actions >= max_num_actions:
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )
                if tensorboard is not None:
                    tensorboard.log_all_test_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Update the scores based on meta_data
                self.meta_data_util.log_results(metadata)

                if debug:
                    # Dictionary to contain key results
                    info["instruction_string"] = instruction_to_string(
                        data_point.instruction, self.config)
                    info["datapoint_id"] = data_point.get_scene_name()
                    info["stop_dist_error"] = metadata["stop_dist_error"]
                    info["closest_dist_error"] = metadata["closest_dist_error"]
                    info["edit_dist_error"] = metadata["edit_dist_error"]
                    info["num_actions_taken"] = num_actions
                    info["predicted_goal"] = predicted_goal
                    info["predicted_error"] = predictor_error
                    info["gold_goal"] = data_point.get_destination_list()[-1]
                    info["final_location"] = (metadata["x_pos"],
                                              metadata["z_pos"])
                    info["predicted_screen_pixels"] = predicted_pixel

                    self.save_attention_prob(test_image_example,
                                             attention_prob,
                                             info["instruction_string"],
                                             info["datapoint_id"])
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = state.update(image,
                                     action,
                                     pose=pose,
                                     position_orientation=position_orientation,
                                     data_point=data_point)

                # Set the goal based on the current position and angle
                current_bot_location = metadata["x_pos"], metadata["z_pos"]
                current_bot_pose = metadata["y_angle"]
                state.goal = PredictorPlannerAgent.get_goal_location(
                    current_bot_location, current_bot_pose, predicted_goal, 32,
                    32)
                num_actions += 1

        # logging.info("Error, Start-Distance, Turn-Angle,  %r %r %r", metadata["stop_dist_error"], distance, angle)
        return metadata, actions, predictor_error, info
Esempio n. 20
0
    def try_to_progress(self):

        # If in state (1) or (2) then return immediately
        if self.status == Client.WAITING_FOR_EXAMPLE or self.status == Client.WAITING_FOR_ACTION:
            return self.status

        assert self.status == Client.WAITING_TO_RECEIVE

        # If in state (3) then see if the message is available. If the message
        # is available then return to waiting for an action or a new example.
        if self.state is None:
            feedback = self.server.receive_reset_feedback_nonblocking()
        else:
            feedback = self.server.receive_feedback_nonblocking()

        if feedback is None:
            return self.status
        else:
            if self.state is None:
                # assert False, "state should not be none"
                # Feedback is in response to reset
                image, metadata = feedback

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                self.state = AgentObservedState(
                    instruction=self.current_data_point.instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    data_point=self.current_data_point)

                # Waiting for action
                self.status = Client.WAITING_FOR_ACTION
            else:
                # Feedback is in response to an action
                image, reward, metadata = feedback
                self.total_reward += reward

                # Create a replay item unless it is forced
                if not self.forced_stop:
                    all_rewards = self._get_all_rewards(metadata)
                    replay_item = ReplayMemoryItem(
                        self.state,
                        self.last_action,
                        reward,
                        log_prob=self.last_log_prob,
                        image_emb_seq=self.image_emb_seq,
                        factor_entropy=self.factor_entropy,
                        all_rewards=all_rewards)
                    self.batch_replay_items.append(replay_item)

                # Update the agent state
                self.state = self.state.update(
                    image,
                    self.last_action,
                    data_point=self.current_data_point)

                if self.last_action == self.agent.action_space.get_stop_action_index(
                ):
                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata)

                    if self.tensorboard is not None:
                        self.tensorboard.log_all_train_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])
                    self.status = Client.WAITING_FOR_EXAMPLE
                else:

                    if self.num_action >= self.max_num_actions:
                        # Send forced stop action and wait to receive
                        self._take_forced_stop()
                        self.status = Client.WAITING_TO_RECEIVE
                    else:
                        # Wait to take another action
                        self.status = Client.WAITING_FOR_ACTION

            self.metadata = metadata
            return self.status
Esempio n. 21
0
    def test(self,
             test_dataset,
             vocab,
             tensorboard=None,
             logger=None,
             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        metadata = {"feedback": ""}
        sum_bisk_metric = 0
        for data_point_ix, data_point in enumerate(test_dataset):
            image, metadata = self.server.reset_receive_feedback(data_point)
            sum_bisk_metric += metadata["metric"]
            instruction = self.convert_text_to_indices(metadata["instruction"],
                                                       vocab)
            state = AgentObservedState(instruction=instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       data_point=data_point)
            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None

            while True:

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, _ = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                else:
                    # print "Num action is " + str(num_actions) + " and max is " + str(max_num_actions)
                    log_probabilities, model_state = self.model.get_probs(
                        state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))
                    # raise AssertionError("Unhandled Model type.")

                # Use test policy to get the action
                action = self.test_policy(probabilities)
                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    # if tensorboard is not None:
                    #     tensorboard.log_all_test_errors(
                    #         metadata["edit_dist_error"],
                    #         metadata["closest_dist_error"],
                    #         metadata["stop_dist_error"])

                    # if metadata["stop_dist_error"] < 5.0:
                    #     task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata, logger)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    num_actions += 1

        self.log("Overall test results:", logger)
        self.log(
            "Mean Bisk Metric %r" %
            (sum_bisk_metric / float(len(test_dataset))), logger)
        # self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        # self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
Esempio n. 22
0
    def test_auto_segmented(self,
                            test_dataset,
                            tensorboard=None,
                            segmenting_type="auto"):
        assert segmenting_type in ("auto", "oracle")
        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()

        metadata = ""

        for data_point in test_dataset:
            if segmenting_type == "auto":
                segmented_instruction = data_point.get_instruction_auto_segmented(
                )
            else:
                segmented_instruction = data_point.get_instruction_oracle_segmented(
                )
            num_segments = len(segmented_instruction)
            gold_num_actions = len(data_point.get_trajectory())
            horizon = gold_num_actions // num_segments
            horizon += self.constants["max_extra_horizon_auto_segmented"]

            image, metadata = self.server.reset_receive_feedback(data_point)

            instruction = instruction_to_string(data_point.get_instruction(),
                                                self.config)
            print("TEST INSTRUCTION: %r" % instruction)
            print("")

            for instruction_i, instruction in enumerate(segmented_instruction):

                state = AgentObservedState(
                    instruction=instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    prev_instruction=data_point.get_prev_instruction(),
                    next_instruction=data_point.get_next_instruction)

                num_actions = 0
                # self._save_agent_state(state, num_actions)

                while True:

                    # Generate probabilities over actions
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                    # print "test probs:", probabilities

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    # logging.info("Taking action-num=%d horizon=%d action=%s from %s",
                    #              num_actions, max_num_actions, str(action), str(probabilities))

                    if action == self.action_space.get_stop_action_index(
                    ) or num_actions >= horizon:
                        break

                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(
                            action)

                        # Update the agent state
                        state = state.update(image, action)
                        num_actions += 1

            _, _, metadata = self.server.halt_and_receive_feedback()
            if tensorboard is not None:
                tensorboard.log_test_error(metadata["error"])

        self.meta_data_util.log_results(metadata)
        logging.info("Testing data action counts %r", action_counts)
Esempio n. 23
0
    def do_train_forced_reading(self, agent, train_dataset, tune_dataset,
                                experiment_name):
        """ Perform training """

        assert isinstance(
            agent, ReadPointerAgent
        ), "This learning algorithm works only with READPointerAgent"

        dataset_size = len(train_dataset)

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            action_counts = dict()
            action_counts[ReadPointerAgent.READ_MODE] = [0] * 2
            action_counts[ReadPointerAgent.
                          ACT_MODE] = [0] * self.action_space.num_actions()

            # Test on tuning data
            agent.test_forced_reading(tune_dataset,
                                      tensorboard=self.tensorboard)

            batch_replay_items = []
            total_reward = 0
            episodes_in_batch = 0

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                num_actions = 0
                max_num_actions = len(data_point.get_trajectory())
                max_num_actions += self.constants["max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)
                oracle_segments = data_point.get_instruction_oracle_segmented()
                pose = int(metadata["y_angle"] / 15.0)
                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=image,
                                           previous_action=None,
                                           pose=pose)

                per_segment_budget = int(max_num_actions /
                                         len(oracle_segments))
                num_segment_actions = 0

                mode = ReadPointerAgent.READ_MODE
                current_segment_ix = 0

                while True:

                    if mode == ReadPointerAgent.READ_MODE:
                        # Find the number of tokens to read for the gold segment
                        num_segment_size = len(
                            oracle_segments[current_segment_ix])
                        current_segment_ix += 1
                        for i in range(0, num_segment_size):
                            state = state.update_on_read()
                        mode = ReadPointerAgent.ACT_MODE

                    elif mode == ReadPointerAgent.ACT_MODE:

                        # Sample action using the policy
                        # Generate probabilities over actions
                        probabilities = list(
                            torch.exp(self.model.get_probs(state, mode).data))

                        # Use test policy to get the action
                        action = gp.sample_action_from_prob(probabilities)
                        action_counts[mode][action] += 1

                        # deal with act mode boundary conditions
                        if num_actions >= max_num_actions:
                            forced_stop = True
                            break

                        elif action == agent.action_space.get_stop_action_index(
                        ) or num_segment_actions > per_segment_budget:
                            if state.are_tokens_left_to_be_read():
                                # reward = self._calc_reward_act_halt(state)
                                if metadata["error"] < 5.0:
                                    reward = 1.0
                                else:
                                    reward = -1.0

                                # Add to replay memory
                                replay_item = ReplayMemoryItem(
                                    state,
                                    agent.action_space.get_stop_action_index(),
                                    reward, mode)
                                if action == agent.action_space.get_stop_action_index(
                                ):
                                    batch_replay_items.append(replay_item)

                                mode = ReadPointerAgent.READ_MODE
                                agent.server.force_goal_update()
                                state = state.update_on_act_halt()
                                num_segment_actions = 0
                            else:
                                if action == agent.action_space.get_stop_action_index(
                                ):
                                    forced_stop = False
                                else:  # stopping due to per segment budget exhaustion
                                    forced_stop = True
                                break

                        else:
                            image, reward, metadata = agent.server.send_action_receive_feedback(
                                action)

                            # Store it in the replay memory list
                            replay_item = ReplayMemoryItem(state,
                                                           action,
                                                           reward,
                                                           mode=mode)
                            batch_replay_items.append(replay_item)

                            # Update the agent state
                            pose = int(metadata["y_angle"] / 15.0)
                            state = state.update(image, action, pose=pose)

                            num_actions += 1
                            num_segment_actions += 1
                            total_reward += reward

                    else:
                        raise AssertionError(
                            "Mode should be either read or act. Unhandled mode: "
                            + str(mode))

                assert mode == ReadPointerAgent.ACT_MODE, "Agent should end on Act Mode"

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state, agent.action_space.get_stop_action_index(),
                        reward, mode)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                episodes_in_batch += 1
                if episodes_in_batch == 1:
                    loss_val = self.do_update(batch_replay_items)
                    batch_replay_items = []
                    entropy_val = float(self.entropy.data[0])
                    self.tensorboard.log(entropy_val, loss_val, total_reward)
                    total_reward = 0
                    episodes_in_batch = 0

                self.tensorboard.log_train_error(metadata["error"])

            # Save the model
            self.model.save_model(
                experiment_name +
                "/read_pointer_forced_reading_contextual_bandit_resnet_epoch_"
                + str(epoch))

            logging.info("Training data action counts %r", action_counts)
Esempio n. 24
0
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  vocab,
                  use_pushover=False):

        print("In training...")

        launch_k_unity_builds([config["port"]],
                              "./simulators/house_3_elmer.x86_64")
        server.initialize_server()
        print("launched builds")

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            # pushover_logger = PushoverLogger(experiment_name)
            pushover_logger = None
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=test_policy,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = TmpSupervisedLearning(shared_model, local_model,
                                        action_space, meta_data_util, config,
                                        constants, tensorboard)
        # TODO change 2 --- unity launch moved up

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                image, metadata = tmp_agent.server.reset_receive_feedback(
                    data_point)
                # instruction = TmpSupervisedLearning.convert_text_to_indices(metadata["instruction"], vocab)
                instruction = data_point.get_instruction()

                # Pose and Orientation gone TODO change 3
                state = AgentObservedState(instruction=instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)

                model_state = None
                batch_replay_items = []
                total_reward = 0

                # trajectory = metadata["trajectory"]
                trajectory = data_point.get_trajectory()[0:300]

                for action in trajectory:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)

                    # Sample action from the probability
                    action_counts[action] += 1

                    # Send the action and get feedback
                    image, reward, metadata = tmp_agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    # Pose and orientation gone, TODO change 4
                    state = state.update(image, action, data_point=data_point)

                    total_reward += reward

                # Send final STOP action and get feedback
                # Sample action using the policy
                log_probabilities, model_state, image_emb_seq, state_feature = \
                    local_model.get_probs(state, model_state)
                image, reward, metadata = tmp_agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # if tensorboard is not None:
                #     tensorboard.log_all_train_errors(
                #         metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"])

                # Store it in the replay memory list
                replay_item = ReplayMemoryItem(
                    state,
                    action_space.get_stop_action_index(),
                    reward,
                    log_prob=log_probabilities)
                batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    # del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        # cross_entropy = float(learner.cross_entropy.data[0])
                        # tensorboard.log(cross_entropy, loss_val, 0)
                        num_actions = len(trajectory) + 1
                        tensorboard.log_scalar(
                            "loss_val", loss_val)  # /float(num_actions))
                        entropy = float(
                            learner.entropy.data[0])  # /float(num_actions)
                        tensorboard.log_scalar("entropy", entropy)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)
                        if learner.mean_factor_entropy is not None:
                            mean_factor_entropy = float(
                                learner.mean_factor_entropy.data[0])
                            tensorboard.log_factor_entropy_loss(
                                mean_factor_entropy)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                print("Going for testing")
                tmp_agent.test(tune_dataset,
                               vocab,
                               tensorboard=tensorboard,
                               logger=logger,
                               pushover_logger=pushover_logger)
                print("Done testing")
Esempio n. 25
0
    def do_train(self, agent, train_dataset, tune_dataset, experiment_name):
        """ Perform training """

        dataset_size = len(train_dataset)

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            action_counts = [0] * self.action_space.num_actions()

            # Test on tuning data
            agent.test(tune_dataset, tensorboard=self.tensorboard)

            batch_replay_items = []
            total_reward = 0
            episodes_in_batch = 0

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                # instruction = instruction_to_string(
                #     data_point.get_instruction(), self.config)
                # print "TRAIN INSTRUCTION: %r" % instruction
                # print ""

                instruction = data_point.get_paragraph_instruction()

                num_actions = 0
                max_num_actions = len(data_point.get_trajectory())
                max_num_actions += self.constants["max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)
                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=data_point.get_paragraph_instruction(),
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point)
                state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices(
                )

                forced_stop = True

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    # Generate probabilities over actions
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))

                    # Use test policy to get the action
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == agent.action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state, action, reward)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state, agent.action_space.get_stop_action_index(),
                        reward)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                episodes_in_batch += 1
                if episodes_in_batch == 1:
                    loss_val = self.do_update(batch_replay_items)
                    batch_replay_items = []
                    # entropy_val = float(self.entropy.data[0])
                    # self.tensorboard.log(entropy_val, loss_val, total_reward)
                    cross_entropy = float(self.cross_entropy.data[0])
                    self.tensorboard.log(cross_entropy, loss_val, total_reward)
                    total_reward = 0
                    episodes_in_batch = 0

                if self.tensorboard is not None:
                    self.tensorboard.log_all_train_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

            # Save the model
            self.model.save_model(experiment_name +
                                  "/contextual_bandit_resnet_epoch_" +
                                  str(epoch))

            logging.info("Training data action counts %r", action_counts)
Esempio n. 26
0
    def debug_tracking(self, data_point, vocab):

        self.server.clear_metadata()
        task_completion_accuracy = 0

        image, metadata = self.server.reset_receive_feedback(data_point)
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=image,
                                   previous_action=None,
                                   data_point=data_point)
        num_actions = 0
        print("Instruction is ",
              " ".join([vocab[index] for index in data_point.instruction]))
        plt.ion()

        # Get the panoramic image
        panorama, _ = self.server.explore()

        # Show the goal location
        self.show_goal_location(panorama, metadata, size=6)

        tracking_values = input(
            "Enter the region, row and column for tracking.")
        region_ix, row, col = [int(w) for w in tracking_values.split()]
        if region_ix == 0:
            camera_ix = 3
        elif region_ix == 1:
            camera_ix = 4
        elif region_ix == 2:
            camera_ix = 5
        elif region_ix == 3:
            camera_ix = 0
        elif region_ix == 4:
            camera_ix = 1
        elif region_ix == 5:
            camera_ix = 2
        else:
            raise AssertionError("Region ix should be in {0, 1, 2, 3, 4, 5}")

        row_value = row / 32.0
        col_value = col / 32.0
        self.server.set_tracking(camera_ix, row_value, col_value)
        input("Tracking done. Enter to continue")

        while True:

            # Show the goal location
            self.show_goal_location(image,
                                    metadata,
                                    goal_type="inferred",
                                    size=1)

            incorrect_action = True
            action_string = None
            while incorrect_action:
                action_string = input(
                    "Take the action. 0: Forward, 1: Left, 2: Right, 3: Stop, 4: Interact\n"
                )
                if action_string in ['0', '1', '2', '3', '4']:
                    incorrect_action = False
                if action_string == '4':
                    interact_values = input(
                        "Enter the row and column in format: row col")
                    row, col = interact_values.split()
                    row, col = int(row), int(col)
                    action_string = 4 + row * 32 + col

            action = int(action_string)
            action_name = self.action_space.get_action_name(action)

            if action == self.action_space.get_stop_action_index():
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )

                print("Metadata is ", metadata)
                if metadata["navigation-error"] <= 1.0:
                    task_completion_accuracy += 1
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                state = state.update(image, action, data_point=data_point)
                num_actions += 1

            print("Metadata is ", metadata)
            print("Took action %r, Got reward %r" % (action_name, reward))
    def get_3d_location_for_paragraphs(self,
                                       exploration_image,
                                       instruction,
                                       start_pos,
                                       goal_pos,
                                       panaroma=True):

        state = AgentObservedState(instruction=instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=exploration_image,
                                   previous_action=None,
                                   pose=None,
                                   position_orientation=start_pos,
                                   data_point=None)

        volatile = self.predictor_model.get_attention_prob(state,
                                                           model_state=None)
        inferred_ix = int(
            torch.max(volatile["attention_logits"],
                      0)[1].data.cpu().numpy()[0])

        ########################################
        # inst_string = instruction_to_string(instruction, self.config)
        # self.save_attention_prob(exploration_image, volatile["attention_probs"][:-1].view(32, 192), inst_string)
        ########################################

        predicted_row = int(inferred_ix / float(192))
        predicted_col = inferred_ix % 192

        if panaroma:
            # Index of the 6 image where the goal is
            region_index = int(predicted_col / 32)
            predicted_col = predicted_col % 32  # Column within that image where the goal is
            pos = start_pos
            new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\
                get_new_pos_angle_from_region_index(region_index, pos)
            metadata = {
                "x_pos": pos[0],
                "z_pos": pos[1],
                "y_angle": new_pos_angle
            }
        else:
            pos = start_pos
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

        row, col = predicted_row + 0.5, predicted_col + 0.5

        start_pos = current_pos_from_metadata(metadata)
        start_pose = current_pose_from_metadata(metadata)

        height_drone = 2.5
        x_gen, z_gen = get_inverse_object_position(
            row, col, height_drone, 30, 32, 32,
            (start_pos[0], start_pos[1], start_pose))
        predicted_goal_pos = (x_gen, z_gen)
        x_goal, z_goal = goal_pos

        x_diff = x_gen - x_goal
        z_diff = z_gen - z_goal

        dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)

        return predicted_goal_pos, dist
Esempio n. 28
0
    def _test(self, data_point, tensorboard=None, logger=None):

        image, metadata = self.server.reset_receive_feedback(data_point)
        state = AgentObservedState(instruction=data_point.instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=image,
                                   previous_action=None,
                                   data_point=data_point)
        num_actions = 0
        max_num_actions = self.constants["horizon"]
        model_state = None
        actions = []
        total_reward = 0.0

        while True:

            # Generate probabilities over actions
            if isinstance(self.model, AbstractModel):
                probabilities = list(
                    torch.exp(self.model.get_probs(state).data))
            elif isinstance(self.model, AbstractIncrementalModel):
                log_probabilities, model_state, _, _ = self.model.get_probs(
                    state, model_state, volatile=True)
                probabilities = list(torch.exp(log_probabilities.data))[0]
            else:
                log_probabilities, model_state = self.model.get_probs(
                    state, model_state)
                probabilities = list(torch.exp(log_probabilities.data))

            # Use test policy to get the action
            action = self.test_policy(probabilities)
            actions.append(action)

            if action == self.action_space.get_stop_action_index(
            ) or num_actions >= max_num_actions:
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )

                if tensorboard is not None:
                    tensorboard.log_scalar("navigation_error",
                                           metadata["navigation_error"])

                total_reward += reward

                # Update the scores based on meta_data
                self.log("StreetView Metadata: %r" % metadata, logger)
                self.log(
                    "Test Example: Num actions %r, Navigation Error %r, Total Reward %r "
                    %
                    (num_actions, metadata["navigation_error"], total_reward),
                    logger)
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)

                total_reward += reward

                # Update the agent state
                state = state.update(image, action, data_point=data_point)
                num_actions += 1

        return metadata, actions
    def test_auto_segmented(self,
                            test_dataset,
                            logger=None,
                            tensorboard=None,
                            segmenting_type="oracle"):

        assert segmenting_type in ("auto", "oracle")
        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()

        self.log(
            "Performing testing on paragraphs with segmenting type %r" %
            segmenting_type, logger)
        metadata = {"feedback": ""}

        for data_point in test_dataset:
            if segmenting_type == "auto":
                segmented_instruction = data_point.get_instruction_auto_segmented(
                )
            else:
                segmented_instruction = data_point.get_instruction_oracle_segmented(
                )

            max_num_actions = self.constants["horizon"]
            image, metadata = self.server.reset_receive_feedback(data_point)

            for instruction_i, instruction in enumerate(segmented_instruction):

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point,
                    prev_instruction=data_point.get_prev_instruction(),
                    next_instruction=data_point.get_next_instruction())

                # Reset the actions taken and model state
                num_actions = 0
                model_state = None

                # Predict the goal by performing an exploration image and then finding the next suitable place to visit
                exploration_image, _, _ = self.server.explore()
                image_slices = []
                for img_ctr in range(0, 6):
                    image_slice = exploration_image[
                        img_ctr * 3:(img_ctr + 1) *
                        3, :, :]  # 3 x height x width
                    # Scale the intensity of the image as done by scipy.misc.imsave
                    image_slice = scipy.misc.bytescale(
                        image_slice.swapaxes(0, 1).swapaxes(1, 2))
                    image_slices.append(image_slice)

                # Reorder and horizontally stitch the images
                reordered_images = [
                    image_slices[3], image_slices[4], image_slices[5],
                    image_slices[0], image_slices[1], image_slices[2]
                ]
                exploration_image = np.hstack(reordered_images).swapaxes(
                    1, 2).swapaxes(0, 1)  # 3 x height x (width*6)

                start_pos = (metadata["x_pos"], metadata["z_pos"],
                             metadata["y_angle"])
                goal_pos = data_point.get_destination_list()[instruction_i]
                predicted_goal, predictor_error = self.get_3d_location_for_paragraphs(
                    exploration_image,
                    instruction,
                    start_pos,
                    goal_pos,
                    panaroma=True)
                current_bot_location = metadata["x_pos"], metadata["z_pos"]
                current_bot_pose = metadata["y_angle"]
                state.goal = PredictorPlannerAgent.get_goal_location(
                    current_bot_location, current_bot_pose, predicted_goal, 32,
                    32)
                print("Predicted Error ", predictor_error)

                while True:

                    # Generate probabilities over actions
                    if isinstance(self.model, AbstractModel):
                        probabilities = list(
                            torch.exp(self.model.get_probs(state).data))
                    elif isinstance(self.model, AbstractIncrementalModel):
                        log_probabilities, model_state, _, _ = self.model.get_probs(
                            state, model_state, volatile=True)
                        probabilities = list(torch.exp(
                            log_probabilities.data))[0]
                    else:
                        raise AssertionError("Unhandled Model type.")

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    if action == self.action_space.get_stop_action_index(
                    ) or num_actions >= max_num_actions:

                        intermediate_goal = data_point.get_destination_list(
                        )[instruction_i]
                        agent_position = metadata["x_pos"], metadata["z_pos"]
                        distance = self._l2_distance(agent_position,
                                                     intermediate_goal)
                        self.log("Instruction is %r " % instruction, logger)
                        self.log(
                            "Predicted Goal is %r, Goal Reached is %r and Real goal is %r "
                            % (predicted_goal, agent_position,
                               intermediate_goal), logger)
                        self.log(
                            "Agent: Position %r got Distance %r " %
                            (instruction_i + 1, distance), logger)
                        break

                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(
                            action)

                        # Update the agent state
                        pose = int(metadata["y_angle"] / 15.0)
                        position_orientation = (metadata["x_pos"],
                                                metadata["z_pos"],
                                                metadata["y_angle"])
                        state = state.update(
                            image,
                            action,
                            pose=pose,
                            position_orientation=position_orientation,
                            data_point=data_point)

                        # Set the goal based on the current position and angle
                        current_bot_location = metadata["x_pos"], metadata[
                            "z_pos"]
                        current_bot_pose = metadata["y_angle"]
                        state.goal = PredictorPlannerAgent.get_goal_location(
                            current_bot_location, current_bot_pose,
                            predicted_goal, 32, 32)

                        num_actions += 1

            image, reward, metadata = self.server.halt_and_receive_feedback()
            if tensorboard is not None:
                tensorboard.log_all_test_errors(metadata["edit_dist_error"],
                                                metadata["closest_dist_error"],
                                                metadata["stop_dist_error"])

            # Update the scores based on meta_data
            self.meta_data_util.log_results(metadata)

        self.meta_data_util.log_results(metadata)
        logging.info("Testing data action counts %r", action_counts)
Esempio n. 30
0
    def test_auto_segmented(self,
                            test_dataset,
                            segmenting_type="oracle",
                            tensorboard=None,
                            logger=None,
                            pushover_logger=None):

        assert segmenting_type in ("auto", "oracle")
        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()

        self.log(
            "Performing testing on paragraphs with segmenting type %r" %
            segmenting_type, logger)
        metadata = {"feedback": ""}

        task_completion_accuracy = 0

        for data_point in test_dataset:
            if segmenting_type == "auto":
                segmented_instruction = data_point.get_instruction_auto_segmented(
                )
            else:
                segmented_instruction = data_point.get_instruction_oracle_segmented(
                )

            max_num_actions = self.constants["horizon"]
            image, metadata = self.server.reset_receive_feedback(data_point)

            for instruction_i, instruction in enumerate(segmented_instruction):

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point,
                    prev_instruction=data_point.get_prev_instruction(),
                    next_instruction=data_point.get_next_instruction())

                # Reset the actions taken and model state
                num_actions = 0
                model_state = None

                while True:

                    # Generate probabilities over actions
                    if isinstance(self.model, AbstractModel):
                        probabilities = list(
                            torch.exp(self.model.get_probs(state).data))
                    elif isinstance(self.model, AbstractIncrementalModel):
                        log_probabilities, model_state, _, _ = self.model.get_probs(
                            state, model_state, volatile=True)
                        probabilities = list(torch.exp(
                            log_probabilities.data))[0]
                    else:
                        raise AssertionError("Unhandled Model type.")

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    if action == self.action_space.get_stop_action_index(
                    ) or num_actions >= max_num_actions:
                        # Compute the l2 distance

                        intermediate_goal = data_point.get_destination_list(
                        )[instruction_i]
                        agent_position = metadata["x_pos"], metadata["z_pos"]
                        distance = self._l2_distance(agent_position,
                                                     intermediate_goal)
                        # logging.info("Agent: Position %r got Distance %r " % (instruction_i + 1, distance))
                        # self.log("Agent: Position %r got Distance %r " % (instruction_i + 1, distance), logger)
                        break

                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(
                            action)

                        # Update the agent state
                        pose = int(metadata["y_angle"] / 15.0)
                        position_orientation = (metadata["x_pos"],
                                                metadata["z_pos"],
                                                metadata["y_angle"])
                        state = state.update(
                            image,
                            action,
                            pose=pose,
                            position_orientation=position_orientation,
                            data_point=data_point)
                        num_actions += 1

            image, reward, metadata = self.server.halt_and_receive_feedback()
            if tensorboard is not None:
                tensorboard.log_all_test_errors(metadata["edit_dist_error"],
                                                metadata["closest_dist_error"],
                                                metadata["stop_dist_error"])

            # Update the scores based on meta_data
            self.meta_data_util.log_results(metadata)

            if metadata["stop_dist_error"] < 5.0:
                task_completion_accuracy += 1

        logging.info("Testing data action counts %r", action_counts)
        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log(
            "Testing: Task completion accuracy is: %r" %
            task_completion_accuracy, logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        self.meta_data_util.log_results(metadata, logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"]) + \
                                " --- " + "task_completion_accuracy=%r" % task_completion_accuracy
            pushover_logger.log(pushover_feedback)