コード例 #1
0
    def dump_data(self, train_dataset, test_dataset):

        landmark_distribution = [0] * 63
        theta_1_distribution = [0] * NO_BUCKETS
        theta_2_distribution = [0] * NO_BUCKETS
        r_distribution = [0] * 15

        for data_point in train_dataset:
            symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(
                data_point)
            landmark, theta_1, theta_2, r = symbolic_form
            landmark_distribution[landmark] += 1
            theta_1_distribution[theta_1] += 1
            theta_2_distribution[theta_2] += 1
            r_distribution[r] += 1
            landmark_string = nav_drone_symbolic_instructions.LANDMARK_NAMES[
                landmark]
            instruction = data_point.get_instruction()
            logging.info("Instruction %r, Symbolic form %r %r %r %r",
                         debug.instruction_to_string(instruction, self.config),
                         landmark_string, theta_1, theta_2, r)

        logging.info("Landmark Distribution %r", landmark_distribution)
        logging.info("Theta 1 Distribution %r", theta_1_distribution)
        logging.info("Theta 2 Distribution %r", theta_2_distribution)
        logging.info("R Distribution %r", r_distribution)
コード例 #2
0
    def get_attention_prob(self,
                           agent_observed_state,
                           model_state,
                           mode=None,
                           volatile=False):

        assert isinstance(agent_observed_state, AgentObservedState)
        agent_observed_state_list = [agent_observed_state]

        image_seqs = [[aos.get_last_image()]
                      for aos in agent_observed_state_list]
        image_batch = cuda_var(
            torch.from_numpy(np.array(image_seqs)).float(), volatile)

        instructions = [
            aos.get_instruction() for aos in agent_observed_state_list
        ]
        instructions_batch = cuda_var(
            torch.from_numpy(np.array(instructions)).long())

        time = agent_observed_state.time_step
        time = cuda_var(torch.from_numpy(np.array([time])).long())

        instruction_string = instruction_to_string(
            agent_observed_state.instruction, self.config)

        state_feature = self.final_module.get_attention_prob(
            image_batch, instructions_batch, instruction_string,
            agent_observed_state.goal)
        return state_feature
コード例 #3
0
ファイル: nav_drone_server_py3.py プロジェクト: lil-lab/ciff
 def reset(self, data_point, action_space, config):
     assert isinstance(data_point, NavDroneDataPoint)
     assert isinstance(action_space, ActionSpace)
     self.move_list = []
     with self.shared_data_lock:
         self.shared_data["scene_name"] = data_point.get_scene_name()
         end_x, end_z = data_point.get_destination_list()[-1]
         self.shared_data["end_x"], self.shared_data["end_z"] = end_x, end_z
         self.shared_data["dest_list"] = data_point.get_destination_list()
         gold_moves = []
         for seg in data_point.get_sub_trajectory_list():
             moves = [action_space.get_action_name(a) for a in seg]
             gold_moves.extend(moves)
             gold_moves.append(STOP)
         self.shared_data["trajectory"] = [SERVER_MOVE_RESPONSES.index(m)
                                           for m in gold_moves]
     instruction_segments = data_point.get_instruction_oracle_segmented()
     instruction_string = ""
     for i, instruction_seg in enumerate(instruction_segments):
         if i % 2 == 0:
             color = "yellow"
         else:
             color = "magenta"
         instruction_string += "<color=%s>" % color
         instruction_string += instruction_to_string(instruction_seg, config)
         instruction_string += "</color> "
     self.scene_config_queue.put(data_point.get_scene_config())
     self.path_queue.put(data_point.get_scene_path())
     self.instructions_queue.put(instruction_string.strip())
     self.start_pos_queue.put(data_point.get_start_pos())
     self.next_dest_queue.put(data_point.get_destination_list())
     self.move_queue_full = True
コード例 #4
0
    def show_instruction(self, data_point, show_discourse=True):

        if show_discourse:
            paragraph_instruction = data_point.get_paragraph_instruction()
            start_index, end_index = data_point.get_instruction_indices()

            previous_instruction_string = instruction_to_string(
                paragraph_instruction[:start_index], self.config)
            instruction_string = instruction_to_string(
                paragraph_instruction[start_index:end_index], self.config)
            future_instruction_string = instruction_to_string(
                paragraph_instruction[end_index:], self.config)
            return previous_instruction_string + " \n /** " + instruction_string + " **/\n " + future_instruction_string
        else:
            instruction_string = instruction_to_string(
                data_point.get_instruction(), self.config)
            return instruction_string
コード例 #5
0
def tag_dataset(dataset, config):
    noun_set = dict([])

    for data_point in dataset:
        instruction = instruction_to_string(data_point.get_instruction(),
                                            config)

        token_seq = nltk.tokenize.word_tokenize(instruction)
        tagger = nltk.pos_tag(token_seq)
        for tag in tagger:
            if tag[1] == "NN" or tag[1] == "NNP":
                noun = tag[0].lower()
                if noun in noun_set:
                    noun_set[noun] += 1
                else:
                    noun_set[noun] = 1

    sorted_nouns = sorted(noun_set.items(), key=lambda x: -x[1])
    print "Noun set is " + str(sorted_nouns)
コード例 #6
0
    def get_unet_output(self,
                        agent_observed_state,
                        model_state,
                        mode=None,
                        volatile=False):

        assert isinstance(agent_observed_state, AgentObservedState)
        agent_observed_state_list = [agent_observed_state]

        image_seqs = [[aos.get_last_image()]
                      for aos in agent_observed_state_list]
        image_batch = cuda_var(
            torch.from_numpy(np.array(image_seqs)).float(), volatile)

        instructions = [
            aos.get_instruction() for aos in agent_observed_state_list
        ]
        instructions_batch = cuda_var(
            torch.from_numpy(np.array(instructions)).long())

        time = agent_observed_state.time_step
        time = cuda_var(torch.from_numpy(np.array([time])).long())

        instruction_string = instruction_to_string(
            agent_observed_state.instruction, self.config)

        # Embed the text
        _, text_emb_raw = self.text_module(instructions_batch)

        # Embed the image
        image_emb_seq = self.image_module(image_batch)
        image_embedding = image_emb_seq[:,
                                        0, :, :, :]  # 1 x num_channels x height x width

        unet_output = self.final_module(image_embedding, text_emb_raw)
        return unet_output
コード例 #7
0
    def _test(self,
              data_point_ix,
              data_point,
              test_image,
              tensorboard=None,
              debug=False):

        image, metadata = self.server.reset_receive_feedback(data_point)
        pose = int(metadata["y_angle"] / 15.0)
        position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                metadata["y_angle"])
        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=image,
            previous_action=None,
            pose=pose,
            position_orientation=position_orientation,
            data_point=data_point,
            prev_instruction=data_point.get_prev_instruction(),
            next_instruction=data_point.get_next_instruction())

        # Test image
        high_quality_test_image_example = self.get_exploration_image()
        print("Image shape is ", high_quality_test_image_example.shape)
        test_image_example = test_image[data_point_ix][0]

        # Predict the goal
        predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location(
            test_image_example, data_point, panaroma=True)
        current_bot_location = metadata["x_pos"], metadata["z_pos"]
        current_bot_pose = metadata["y_angle"]
        state.goal = PredictorPlannerAgent.get_goal_location(
            current_bot_location, current_bot_pose, predicted_goal, 32, 32)
        print("Predicted Error ", predictor_error)

        num_actions = 0
        actions = []
        info = dict()

        # Dictionary to contain key results
        info["instruction_string"] = instruction_to_string(
            data_point.instruction, self.config)
        info["datapoint_id"] = data_point.get_scene_name()
        info["stop_dist_error"] = metadata["stop_dist_error"]
        info["closest_dist_error"] = metadata["closest_dist_error"]
        info["edit_dist_error"] = metadata["edit_dist_error"]
        info["num_actions_taken"] = num_actions
        info["predicted_goal"] = predicted_goal
        info["predicted_error"] = predictor_error
        info["gold_goal"] = data_point.get_destination_list()[-1]
        info["final_location"] = (metadata["x_pos"], metadata["z_pos"])
        info["predicted_screen_pixels"] = predicted_pixel

        self.save_attention_prob(high_quality_test_image_example,
                                 attention_prob, info["instruction_string"],
                                 info["datapoint_id"])

        # self.save_example(image, info["instruction_string"], info["datapoint_id"], scale=5)

        self.server.halt_and_receive_feedback()

        return metadata, actions, predictor_error, info
コード例 #8
0
    def do_train(self, agent, train_dataset, tune_dataset, experiment_name):
        """ Perform training """

        assert isinstance(
            agent, ReadPointerAgent
        ), "This learning algorithm works only with READPointerAgent"

        dataset_size = len(train_dataset)

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            action_counts = dict()
            action_counts[ReadPointerAgent.READ_MODE] = [0] * 2
            action_counts[ReadPointerAgent.
                          ACT_MODE] = [0] * self.action_space.num_actions()

            # Test on tuning data
            agent.test(tune_dataset, tensorboard=self.tensorboard)

            batch_replay_items = []
            total_reward = 0
            episodes_in_batch = 0

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                num_actions = 0
                max_num_actions = len(data_point.get_trajectory())
                max_num_actions += self.constants["max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)
                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=image,
                                           previous_action=None)

                mode = ReadPointerAgent.READ_MODE
                last_action_was_halt = False

                instruction = instruction_to_string(
                    data_point.get_instruction(), self.config)
                print "TRAIN INSTRUCTION: %r" % instruction
                print ""

                while True:

                    # Sample action using the policy
                    # Generate probabilities over actions
                    probabilities = list(
                        torch.exp(self.model.get_probs(state, mode).data))

                    # Use test policy to get the action
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[mode][action] += 1

                    if mode == ReadPointerAgent.READ_MODE:
                        # read mode boundary conditions
                        forced_action = False
                        if not state.are_tokens_left_to_be_read():
                            # force halt
                            action = 1
                            forced_action = True
                        elif num_actions >= max_num_actions or last_action_was_halt:
                            # force read
                            action = 0
                            forced_action = True

                        if not forced_action:
                            # Store reward in the replay memory list
                            reward = self._calc_reward_read_mode(state, action)
                            replay_item = ReplayMemoryItem(state,
                                                           action,
                                                           reward,
                                                           mode=mode)
                            batch_replay_items.append(replay_item)

                        if action == 0:
                            last_action_was_halt = False
                            state = state.update_on_read()
                        elif action == 1:
                            last_action_was_halt = True
                            mode = ReadPointerAgent.ACT_MODE
                        else:
                            raise AssertionError(
                                "Read mode only supports two actions: read(0) and halt(1). "
                                + "Found " + str(action))

                    elif mode == ReadPointerAgent.ACT_MODE:
                        # deal with act mode boundary conditions
                        if num_actions >= max_num_actions:
                            forced_stop = True
                            break

                        elif action == agent.action_space.get_stop_action_index(
                        ):
                            if state.are_tokens_left_to_be_read():
                                reward = self._calc_reward_act_halt(state)

                                # Add to replay memory
                                replay_item = ReplayMemoryItem(
                                    state,
                                    agent.action_space.get_stop_action_index(),
                                    reward, mode)
                                batch_replay_items.append(replay_item)

                                mode = ReadPointerAgent.READ_MODE
                                last_action_was_halt = True
                                state = state.update_on_act_halt()
                            else:
                                forced_stop = False
                                break

                        else:
                            image, reward, metadata = agent.server.send_action_receive_feedback(
                                action)

                            # Store it in the replay memory list
                            replay_item = ReplayMemoryItem(state,
                                                           action,
                                                           reward,
                                                           mode=mode)
                            batch_replay_items.append(replay_item)

                            # Update the agent state
                            state = state.update(image, action)

                            num_actions += 1
                            total_reward += reward
                            last_action_was_halt = False

                    else:
                        raise AssertionError(
                            "Mode should be either read or act. Unhandled mode: "
                            + str(mode))

                assert mode == ReadPointerAgent.ACT_MODE, "Agent should end on Act Mode"

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state, agent.action_space.get_stop_action_index(),
                        reward, mode)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                episodes_in_batch += 1
                if episodes_in_batch == 1:
                    loss_val = self.do_update(batch_replay_items)
                    batch_replay_items = []
                    entropy_val = float(self.entropy.data[0])
                    self.tensorboard.log(entropy_val, loss_val, total_reward)
                    total_reward = 0
                    episodes_in_batch = 0

                self.tensorboard.log_train_error(metadata["error"])

            # Save the model
            self.model.save_model(
                experiment_name +
                "/read_pointer_contextual_bandit_resnet_epoch_" + str(epoch))

            logging.info("Training data action counts %r", action_counts)
コード例 #9
0
    def test_goal_prediction(self,
                             test_dataset,
                             tensorboard=None,
                             logger=None,
                             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0

        sum_loss, count, sum_prob, goal_prob_count = 0, 0, 0, 0

        metadata = {"feedback": ""}
        for data_point_ix, data_point in enumerate(test_dataset):
            print("Datapoint index ", data_point_ix)
            image, metadata = self.server.reset_receive_feedback(data_point)
            pose = int(metadata["y_angle"] / 15.0)
            position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                    metadata["y_angle"])
            state = AgentObservedState(
                instruction=data_point.instruction,
                config=self.config,
                constants=self.constants,
                start_image=image,
                previous_action=None,
                pose=pose,
                position_orientation=position_orientation,
                data_point=data_point,
                prev_instruction=data_point.get_prev_instruction(),
                next_instruction=data_point.get_next_instruction())

            ##################################
            state.goal = GoalPrediction.get_goal_location(
                metadata, data_point, 8, 8)
            print("Instruction is ",
                  instruction_to_string(data_point.instruction, self.config))
            ##################################

            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None
            trajectory = data_point.get_trajectory()[0:1]
            trajectory_len = len(trajectory)

            while True:

                if num_actions == trajectory_len:
                    action = self.action_space.get_stop_action_index()
                else:
                    action = trajectory[num_actions]

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    raise NotImplementedError()
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, volatile = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                    # Compute goal prediction accuracy
                    goal_loss, prob, _ = self.goal_prediction_accuracy(
                        state.goal, volatile)
                    sum_loss += goal_loss
                    count += 1
                    if prob is not None:
                        sum_prob += prob
                        goal_prob_count += 1
                else:
                    raise NotImplementedError()
                    # log_probabilities, model_state = self.model.get_probs(state, model_state)
                    # probabilities = list(torch.exp(log_probabilities.data))

                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    if tensorboard is not None:
                        tensorboard.log_all_test_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])

                    if metadata["stop_dist_error"] < 5.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    self.meta_data_util.log_results(metadata)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)
                    ##################################
                    state.goal = GoalPrediction.get_goal_location(
                        metadata, data_point, 8, 8)
                    ##################################
                    num_actions += 1

        print("Finished testing. Now logging.")
        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log(
            "Testing: Task completion accuracy is: %r" %
            task_completion_accuracy, logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.log(
            "Goal Count %r, Mean Goal Loss %r" %
            (count, sum_loss / float(count)), logger)
        self.log(
            "Goal Prob Count %r, Mean Goal Prob %r" %
            (goal_prob_count, sum_prob / float(goal_prob_count)), logger)

        self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(
                metadata["feedback"]
            ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy
            pushover_logger.log(pushover_feedback)
コード例 #10
0
ファイル: tmp_house_agent.py プロジェクト: lil-lab/ciff
    def test_auto_segmented(self,
                            test_dataset,
                            tensorboard=None,
                            segmenting_type="auto"):
        assert segmenting_type in ("auto", "oracle")
        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()

        metadata = ""

        for data_point in test_dataset:
            if segmenting_type == "auto":
                segmented_instruction = data_point.get_instruction_auto_segmented(
                )
            else:
                segmented_instruction = data_point.get_instruction_oracle_segmented(
                )
            num_segments = len(segmented_instruction)
            gold_num_actions = len(data_point.get_trajectory())
            horizon = gold_num_actions // num_segments
            horizon += self.constants["max_extra_horizon_auto_segmented"]

            image, metadata = self.server.reset_receive_feedback(data_point)

            instruction = instruction_to_string(data_point.get_instruction(),
                                                self.config)
            print("TEST INSTRUCTION: %r" % instruction)
            print("")

            for instruction_i, instruction in enumerate(segmented_instruction):

                state = AgentObservedState(
                    instruction=instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    prev_instruction=data_point.get_prev_instruction(),
                    next_instruction=data_point.get_next_instruction)

                num_actions = 0
                # self._save_agent_state(state, num_actions)

                while True:

                    # Generate probabilities over actions
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                    # print "test probs:", probabilities

                    # Use test policy to get the action
                    action = self.test_policy(probabilities)
                    action_counts[action] += 1

                    # logging.info("Taking action-num=%d horizon=%d action=%s from %s",
                    #              num_actions, max_num_actions, str(action), str(probabilities))

                    if action == self.action_space.get_stop_action_index(
                    ) or num_actions >= horizon:
                        break

                    else:
                        # Send the action and get feedback
                        image, reward, metadata = self.server.send_action_receive_feedback(
                            action)

                        # Update the agent state
                        state = state.update(image, action)
                        num_actions += 1

            _, _, metadata = self.server.halt_and_receive_feedback()
            if tensorboard is not None:
                tensorboard.log_test_error(metadata["error"])

        self.meta_data_util.log_results(metadata)
        logging.info("Testing data action counts %r", action_counts)
コード例 #11
0
    def _test(self,
              data_point_ix,
              data_point,
              test_image,
              tensorboard=None,
              debug=False):

        image, metadata = self.server.reset_receive_feedback(data_point)
        pose = int(metadata["y_angle"] / 15.0)
        position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                metadata["y_angle"])
        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=image,
            previous_action=None,
            pose=pose,
            position_orientation=position_orientation,
            data_point=data_point,
            prev_instruction=data_point.get_prev_instruction(),
            next_instruction=data_point.get_next_instruction())

        # Test image
        if test_image is None:
            test_image_example = self.get_exploration_image()
        else:
            test_image_example = test_image[data_point_ix][0]

        # Predict the goal
        predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location(
            test_image_example, data_point, panaroma=True)
        current_bot_location = metadata["x_pos"], metadata["z_pos"]
        current_bot_pose = metadata["y_angle"]
        state.goal = PredictorPlannerAgent.get_goal_location(
            current_bot_location, current_bot_pose, predicted_goal, 32, 32)
        print("Predicted Error ", predictor_error)

        num_actions = 0
        max_num_actions = self.constants["horizon"]
        model_state = None
        actions = []
        info = dict()

        while True:

            # Generate probabilities over actions
            if isinstance(self.model, AbstractModel):
                probabilities = list(
                    torch.exp(self.model.get_probs(state).data))
            elif isinstance(self.model, AbstractIncrementalModel):
                log_probabilities, model_state, _, _ = self.model.get_probs(
                    state, model_state, volatile=True)
                probabilities = list(torch.exp(log_probabilities.data))[0]
            else:
                raise AssertionError("Unhandled Model type.")

            # Use test policy to get the action
            action = self.test_policy(probabilities)
            actions.append(action)

            if action == self.action_space.get_stop_action_index(
            ) or num_actions >= max_num_actions:
                # Send the action and get feedback
                image, reward, metadata = self.server.halt_and_receive_feedback(
                )
                if tensorboard is not None:
                    tensorboard.log_all_test_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Update the scores based on meta_data
                self.meta_data_util.log_results(metadata)

                if debug:
                    # Dictionary to contain key results
                    info["instruction_string"] = instruction_to_string(
                        data_point.instruction, self.config)
                    info["datapoint_id"] = data_point.get_scene_name()
                    info["stop_dist_error"] = metadata["stop_dist_error"]
                    info["closest_dist_error"] = metadata["closest_dist_error"]
                    info["edit_dist_error"] = metadata["edit_dist_error"]
                    info["num_actions_taken"] = num_actions
                    info["predicted_goal"] = predicted_goal
                    info["predicted_error"] = predictor_error
                    info["gold_goal"] = data_point.get_destination_list()[-1]
                    info["final_location"] = (metadata["x_pos"],
                                              metadata["z_pos"])
                    info["predicted_screen_pixels"] = predicted_pixel

                    self.save_attention_prob(test_image_example,
                                             attention_prob,
                                             info["instruction_string"],
                                             info["datapoint_id"])
                break
            else:
                # Send the action and get feedback
                image, reward, metadata = self.server.send_action_receive_feedback(
                    action)
                # Update the agent state
                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = state.update(image,
                                     action,
                                     pose=pose,
                                     position_orientation=position_orientation,
                                     data_point=data_point)

                # Set the goal based on the current position and angle
                current_bot_location = metadata["x_pos"], metadata["z_pos"]
                current_bot_pose = metadata["y_angle"]
                state.goal = PredictorPlannerAgent.get_goal_location(
                    current_bot_location, current_bot_pose, predicted_goal, 32,
                    32)
                num_actions += 1

        # logging.info("Error, Start-Distance, Turn-Angle,  %r %r %r", metadata["stop_dist_error"], distance, angle)
        return metadata, actions, predictor_error, info
コード例 #12
0
    def test(self,
             test_dataset,
             tensorboard=None,
             logger=None,
             pushover_logger=None):

        self.server.clear_metadata()
        action_counts = [0] * self.action_space.num_actions()
        task_completion_accuracy = 0
        print("Reached Test")
        test_dataset_size = len(test_dataset)

        metadata = {"feedback": ""}
        data_point = random.sample(test_dataset, 1)[0]
        while True:

            print("Please enter an instruction. For sample see:")
            # data_point = random.sample(test_dataset, 1)[0]
            image, metadata = self.server.reset_receive_feedback(data_point)
            print(
                "Sample instruction: ",
                instruction_to_string(data_point.get_instruction(),
                                      self.config))
            input_instruction = input(
                "Enter an instruction or enter q to quit ")
            if input_instruction == "q" or input_instruction == "quit":
                break
            input_instruction_ids = self.convert_to_id(input_instruction)

            pose = int(metadata["y_angle"] / 15.0)
            position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                    metadata["y_angle"])
            state = AgentObservedState(
                instruction=input_instruction_ids,
                config=self.config,
                constants=self.constants,
                start_image=image,
                previous_action=None,
                pose=pose,
                position_orientation=position_orientation,
                data_point=data_point,
                prev_instruction=data_point.get_prev_instruction(),
                next_instruction=data_point.get_next_instruction())
            # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices()
            num_actions = 0
            max_num_actions = self.constants["horizon"]
            model_state = None
            # print "Model state is new "
            while True:

                time.sleep(0.3)

                # Generate probabilities over actions
                if isinstance(self.model, AbstractModel):
                    probabilities = list(
                        torch.exp(self.model.get_probs(state).data))
                elif isinstance(self.model, AbstractIncrementalModel):
                    log_probabilities, model_state, _, _ = self.model.get_probs(
                        state, model_state, volatile=True)
                    probabilities = list(torch.exp(log_probabilities.data))[0]
                else:
                    # print "Num action is " + str(num_actions) + " and max is " + str(max_num_actions)
                    log_probabilities, model_state = self.model.get_probs(
                        state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))
                    # raise AssertionError("Unhandled Model type.")

                # Use test policy to get the action
                action = self.test_policy(probabilities)
                # DONT FORGET TO REMOVE
                # action = np.random.randint(0, 2)
                action_counts[action] += 1

                if action == self.action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.halt_and_receive_feedback(
                    )
                    if tensorboard is not None:
                        tensorboard.log_all_test_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])

                    if metadata["stop_dist_error"] < 5.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    self.meta_data_util.log_results(metadata)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = self.server.send_action_receive_feedback(
                        action)
                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)
                    num_actions += 1

        print("Finished testing. Now logging.")
        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        self.log("Overall test results:", logger)
        self.log(
            "Testing: Task completion accuracy is: %r" %
            task_completion_accuracy, logger)
        self.log("Testing: Final Metadata: %r" % metadata, logger)
        self.log("Testing: Action Distribution: %r" % action_counts, logger)
        self.meta_data_util.log_results(metadata, logger)
        self.log("Testing data action counts %r" % action_counts, logger)
        if pushover_logger is not None:
            pushover_feedback = str(
                metadata["feedback"]
            ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy
            pushover_logger.log(pushover_feedback)
コード例 #13
0
    def test_classifier(self, agent, test_dataset):
        fp, fn, tp, tn = 0, 0, 0, 0
        fn_examples = []
        fp_examples = []
        perfect_segmented_examples = []

        for data_point_ix, data_point in enumerate(test_dataset):
            state = AgentObservedState(
                instruction=data_point.instruction,
                config=self.config,
                constants=self.constants,
                start_image=None,  # image,
                previous_action=None)
            segments = data_point.get_instruction_oracle_segmented()
            segment_lens = [len(s) for s in segments]
            num_mistakes = 0
            for i, seg_len in enumerate(segment_lens):
                segment_instruction = debug.instruction_to_string(
                    segments[i], self.config)
                num_read = 0
                while num_read < seg_len:
                    state = state.update_on_read()
                    num_read += 1
                    candidate_instruction = debug.instruction_to_string(
                        segments[i][:num_read], self.config)
                    model_log_probs = list(
                        self.model.get_segmentation_probs([state
                                                           ]).view(-1).data)
                    pred_action = gp.get_argmax_action(model_log_probs)
                    if num_read < seg_len and pred_action == 0:
                        tn += 1
                    elif num_read < seg_len and pred_action == 1:
                        fp += 1
                        num_mistakes += 1
                        fp_examples.append(
                            (candidate_instruction, segment_instruction))
                    elif num_read == seg_len and pred_action == 0:
                        fn += 1
                        num_mistakes += 1
                        fn_examples.append(
                            (candidate_instruction, segment_instruction))
                    elif num_read == seg_len and pred_action == 1:
                        tp += 1
                state = state.update_on_act_halt()

            if num_mistakes == 0:
                instruction_strings = []
                for seg in segments:
                    instruction_strings.append(
                        debug.instruction_to_string(seg, self.config))
                perfect_segmented_examples.append(
                    " ----- ".join(instruction_strings))

        # calculate precision
        if fp + tp > 0:
            precision = (tp * 1.0) / (fp + tp)
        else:
            precision = 1.0

        # calculate recall
        if fn + tp > 0:
            recall = (tp * 1.0) / (fn + tp)
        else:
            recall = 1.0

        if precision + recall > 0:
            f1 = (2.0 * precision * recall) / (precision + recall)
        else:
            f1 = 0.0

        # print FP examples
        random.shuffle(fp_examples)
        logging.info("FP EXAMPLES:")
        for ex in fp_examples[:20]:
            logging.info(ex)

        # print FN examples
        random.shuffle(fn_examples)
        logging.info("FN EXAMPLES:")
        for ex in fn_examples[:20]:
            logging.info(ex)

        # print perfect segmented examples
        random.shuffle(perfect_segmented_examples)
        logging.info("PERFECT SEGMENTED EXAMPLES:")
        for ex in perfect_segmented_examples[:20]:
            logging.info(ex)

        logging.info("testing results: precision=%.2f; recall=%f; f1=%.2f" %
                     (precision, recall, f1))
コード例 #14
0
    def interactive_shell(self, train_dataset, train_images):

        traj_len = len(train_dataset)
        keep = False
        image_id = 1
        while True:

            # Sample a random dataset
            if not keep:
                ix = random.randint(0, traj_len - 1)
            data_point = train_dataset[ix]
            image = train_images[ix][0]

            # Show the image in pyplot
            plt.imshow(image.swapaxes(0, 1).swapaxes(1, 2))
            plt.ion()
            plt.show()

            # Get the instruction
            print("Enter the instruction below (q or quit to quit)\n")
            print("Sample instruction is ",
                  instruction_to_string(data_point.instruction, self.config))
            while True:
                instruction = input()
                if instruction == "q" or instruction == "quit":
                    break
                elif len(instruction) == 0:
                    print("Enter a non-empty instruction (q or quit to quit)")
                else:
                    break

            instruction_id = self.convert_to_id(instruction)
            state = AgentObservedState(instruction=instruction_id,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=image,
                                       previous_action=None,
                                       pose=None,
                                       position_orientation=None,
                                       data_point=data_point)

            # Show the attention mask
            _, _, _, \
            volatile = self.model.get_attention_prob(state, model_state=None)

            attention_prob = volatile["attention_probs"][:-1].view(
                self.final_height, self.final_width)
            attention_prob = attention_prob.cpu().data.numpy()
            resized_kernel = scipy.misc.imresize(
                attention_prob,
                (self.config["image_height"], self.config["image_width"]))
            plt.clf()
            plt.title(instruction)
            plt.imshow(image.swapaxes(0, 1).swapaxes(1, 2))
            plt.imshow(resized_kernel, cmap="jet", alpha=0.5)

            print(
                "Enter s to save, k to keep working on this environment, sk to do both. Other key to simply continue"
            )
            key_ = input()
            if key_ == "s":
                plt.savefig("interactive_image_" + str(image_id) + ".png")
                image_id += 1

            if key_ == "k":
                keep = True
            else:
                keep = False

            if key_ == "sk":
                plt.savefig("image_" + str(image_id) + ".png")
                image_id += 1
                keep = True

            plt.clf()
コード例 #15
0
    def test(self, tune_dataset, tensorboard):

        total_validation_loss = 0
        total_validation_prob = 0
        total_validation_exact_accuracy = 0
        total_goal_distance = 0
        num_items = 0

        # Next metric measures when the goal is visible and prediction is within 10\% radius
        total_epsilon_accuracy = 0
        num_visible_items = 0

        # Next metric measures distance in real world and only when goal is visible
        total_real_world_distance = 0

        for data_point_ix, data_point in enumerate(tune_dataset):

            model_state = None
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=data_point.start_image,
                                       previous_action=None,
                                       pose=None,
                                       position_orientation=None,
                                       data_point=data_point)

            num_items_ = 0
            sum_loss = 0
            sum_prob = 0
            sum_acc = 0
            sum_dist = 0
            sum_real_world_distance = 0

            row, col = data_point.goal_pixel
            goal = row, col, row, col
            state.goal = goal
            volatile = self.model.get_attention_prob(state, model_state)

            if not self.ignore_none or row is not None:
                gold_ix = row * self.final_width + col
                loss, prob, meta = GoalPrediction.get_loss_and_prob(
                    volatile, goal, self.final_height, self.final_width)
                num_items_ += 1
                sum_loss = sum_loss + float(loss.data.cpu().numpy()[0])
                sum_prob = sum_prob + float(prob.data.cpu().numpy()[0])

                inferred_ix, row_col = self.get_inferred_value(volatile)

                if gold_ix == inferred_ix:
                    sum_acc = sum_acc + 1.0
                if row is not None and col is not None:
                    sum_dist = sum_dist + abs(row - int(round(inferred_ix/self.final_width)))\
                               + abs(col - int(inferred_ix % self.final_height))
                    num_visible_items += 1
                    if self.is_close_enough(inferred_ix, row, col):
                        total_epsilon_accuracy += 1
                    real_world_distance = self.compute_distance_in_real_world(
                        inferred_ix, data_point)
                    sum_real_world_distance += real_world_distance

                    # Save the map
                    instruction_string = instruction_to_string(
                        data_point.instruction, self.config)
                    # goal_x, goal_y = data_point.goal_location
                    # goal_x, goal_y = round(goal_x, 2), round(goal_y, 2)
                    # predicted_goal_x, predicted_goal_y = predicted_goal
                    # predicted_goal_x, predicted_goal_y = round(predicted_goal_x, 2), round(predicted_goal_y, 2)
                    # instruction_string = instruction_string + \
                    #                      "\n (Error: " + str(round(sum_real_world_distance, 2)) + ")" + \
                    #                      "\n %r %r %r %r \n" % (goal_x, goal_y, predicted_goal_x, predicted_goal_y)
                    # self.show_image(data_point.get_destination_list()[-1], predicted_goal, data_point.get_start_pos(),
                    #                 instruction_string)

                    # Save the generated image
                    self.global_id += 1
                    if self.global_id % 25 == 0:
                        goal_prob = GoalPrediction.generate_gold_prob(
                            goal, 32, 32)
                        predicted_goal = (int(inferred_ix / 32),
                                          inferred_ix % 32,
                                          int(inferred_ix / 32),
                                          inferred_ix % 32)
                        predicted_goal_prob = GoalPrediction.generate_gold_prob(
                            predicted_goal, 32, 32)
                        self.save_attention_prob(
                            data_point.start_image,
                            volatile["attention_probs"][:-1].view(32, 32),
                            data_point.instruction_string,
                            goal_prob[:-1].view(32, 32))
                        self.save_attention_prob(
                            data_point.start_image,
                            predicted_goal_prob[:-1].view(32, 32),
                            data_point.instruction_string,
                            goal_prob[:-1].view(32, 32))

            total_validation_loss += sum_loss
            total_validation_prob += sum_prob
            total_goal_distance += sum_dist
            total_validation_exact_accuracy += sum_acc
            total_real_world_distance += sum_real_world_distance
            num_items += num_items_

        mean_total_goal_distance = total_goal_distance / float(
            max(num_items, 1))
        mean_total_validation_loss = total_validation_loss / float(
            max(num_items, 1))
        mean_total_validation_prob = total_validation_prob / float(
            max(num_items, 1))
        mean_total_validation_accuracy = (total_validation_exact_accuracy *
                                          100.0) / float(max(num_items, 1))
        mean_total_epsilon_accuracy = (total_epsilon_accuracy * 100.0) / float(
            max(num_visible_items, 1))
        mean_real_world_distance = total_real_world_distance / float(
            max(num_visible_items, 1))

        logging.info(
            "Mean Test result: L1 Distance is %r, Loss %r, Prob %r, Acc is %r, Epsilon Accuracy is %r"
            % (mean_total_goal_distance, mean_total_validation_loss,
               mean_total_validation_prob, mean_total_validation_accuracy,
               mean_total_epsilon_accuracy))
        logging.info(
            "Num visible items %r, Num Exact Match items is %r, Num epsilon match %r, Num Items is %r "
            % (num_visible_items, total_validation_exact_accuracy,
               total_epsilon_accuracy, num_items))
        logging.info("Num visible items %r, Mean Real World Distance %r " %
                     (num_visible_items, mean_real_world_distance))

        return mean_real_world_distance
コード例 #16
0
    def do_train(self, agent, train_dataset, tune_dataset, experiment_name):
        """ Perform training """

        for epoch in range(1, self.max_epoch + 1):

            # Test on tuning data
            agent.test(tune_dataset, tensorboard=self.tensorboard)

            for data_point in train_dataset:

                batch_replay_items = []
                num_actions = 0
                total_reward = 0
                max_num_actions = len(data_point.get_trajectory())
                max_num_actions += self.constants["max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(data_point)
                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=image,
                                           previous_action=None)

                forced_stop = True

                instruction = instruction_to_string(
                    data_point.get_instruction(), self.config)
                print "TRAIN INSTRUCTION: %r" % instruction
                print ""

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    # Generate probabilities over actions
                    probabilities = list(torch.exp(self.model.get_probs(state).data))

                    # Use test policy to get the action
                    action = gp.sample_action_from_prob(probabilities)

                    if action == agent.action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(action)
                    total_reward += reward

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state, action, reward)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    state = state.update(image, action)

                    num_actions += 1

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback()
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(state, agent.action_space.get_stop_action_index(), reward)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Compute Q-values using sampled rollout
                ReinforceLearning._set_q_val(batch_replay_items)

                # Perform update
                loss_val = self.do_update(batch_replay_items)
                entropy_val = float(self.entropy.data[0])
                self.tensorboard.log(entropy_val, loss_val, total_reward)
                self.tensorboard.log_train_error(metadata["error"])

            # Save the model
            self.model.save_model(experiment_name + "/reinforce_epoch_" + str(epoch))