Example #1
0
class PolicyRoller:
    """
    Really only a wrapper around the roll_out_policy function, which does the policy rollout in the pomdp
    It collects actions both from the user-provided policy and from the oracle (as labels) and accumulates a dataset
    """
    def __init__(self, instance_id=0):
        self.presenter = Presenter()
        self.instance_id = instance_id
        self.env = None

        self.word2token = None
        self.all_instructions = None

    def reset(self):
        self.__init__()

    def load_all_envs(self):
        train_i, dev_i, test_i, corpus = get_all_instructions()
        all_instructions = merge_instruction_sets(train_i, dev_i, test_i)
        token2term, word2token = get_word_to_token_map(corpus)
        env_ids = list(all_instructions.keys())
        return env_ids, all_instructions, corpus, token2term, word2token

    def tokenize_string(self, s):
        word_list = filter(None, s.split(" "))
        token_instruction = list(map(lambda w: self.word2token[w], word_list))
        return token_instruction

    def roll_out_on_segment(self, ):
        pass

    def choose_action(self, params, step, switch_thres, reference_action,
                      policy_action):
        """
        Choose whether to perform the policy action or the reference (oracle) action based on the type of mixture
        policy that is being executed
        :param params: RolloutParams instance
        :param step: current control step number
        :param switch_thres: roll-in/roll-out control step number
        :param reference_action: action executed by oracle
        :param policy_action: action executed by policy
        :return:
        """
        if params.rollout_strategy == RolloutStrategy.POLICY:
            return policy_action
        elif params.rollout_strategy == RolloutStrategy.REFERENCE:
            return reference_action
        elif params.rollout_strategy == RolloutStrategy.POLICY_IN_REF_OUT:
            if step > switch_thres:
                return reference_action
            else:
                return policy_action
        elif params.rollout_strategy == RolloutStrategy.MIXTURE:
            if random.uniform(0, 1) < params.mixture_ref_prob:
                return reference_action
            else:
                return policy_action

    def roll_out_on_env(self,
                        params,
                        instructions_set,
                        set_idx,
                        only_seg_idx=None,
                        custom_instr=None):

        env_dataset = []
        failed = False

        env_id = instructions_set["env"]
        self.env.set_environment(
            env_id, instruction_set=instructions_set['instructions'])
        path = load_and_convert_path(env_id)
        params.initPolicyContext(env_id, path)

        import rollout.run_metadata as md
        segments = list(instructions_set['instructions'])

        # all segments with at least length 2
        valid_segments = [
            (segments[i], segments[i]["seg_idx"]) for i in range(len(segments))
            if segments[i]["end_idx"] - segments[i]["start_idx"] >= 2
        ]

        if len(valid_segments) == 0:
            print("Ding dong!")

        first_seg = True

        # For recurrent policy, we need to explicity start a segment and reset the LSTM state
        # TODO: Make sure this still works for the older non-NL model
        params.policy.start_sequence()

        for segment, seg_idx in valid_segments:
            if only_seg_idx is not None and seg_idx != only_seg_idx:
                print("Skipping seg: " + str(seg_idx) + " as not requested")
                continue

            valid_segment = self.env.set_current_segment(seg_idx)
            if not valid_segment:
                print(
                    f"Skipping segment {seg_idx} as it is empty / invalid for env {env_id}"
                )
                continue

            if params.segment_level:
                params.policy.start_sequence()

            segment_dataset = []

            # Decide when to switch policies
            switch_threshold = params.horizon + 1  # Never switch policies by default
            do_switch = random.uniform(0, 1) < params.switch_prob
            if do_switch and params.threshold_strategy == SwitchThresholdStrategy.UNIFORM:
                switch_threshold = random.uniform(0, params.horizon)

            string_instruction, end_idx, start_idx = segment[
                "instruction"], segment["end_idx"], segment["start_idx"]

            # Manual instruction override to allow rolling out arbitrary instructions for debugging
            if custom_instr is not None:
                print("REPLACED: ", string_instruction)
                string_instruction = custom_instr
            print("INSTRUCTION:", string_instruction)

            # Set some global parameters that can be accessed by other parts of the system
            md.IS_ROLLOUT = True
            md.REAL_DRONE = params.real_drone
            md.RUN_NAME = params.run_name
            md.ENV_ID = env_id
            md.SET_IDX = set_idx
            md.SEG_IDX = seg_idx
            md.START_IDX = start_idx
            md.END_IDX = end_idx
            md.INSTRUCTION = string_instruction

            if hasattr(params.policy, "start_segment_rollout"):
                params.policy.start_segment_rollout(env_id, set_idx, seg_idx)

            token_instruction = self.tokenize_string(string_instruction)

            # At the end of segment N, should we reset drone position to the start of segment N+1 or continue
            # rolling out seamlessly?
            if first_seg or params.shouldResetAlways() or (
                    failed and params.shouldResetIfFailed()):
                state = self.env.reset(seg_idx)
                #instr_str = debug_untokenize_instruction(instruction)
                #Presenter().show_instruction(string_instruction.replace("  ", " "))
                failed = False
                first_seg = False
                sleep(sleepytime)

            # Tell the oracle which part of the path is currently being executed
            params.setCurrentSegment(start_idx, end_idx)

            step_num = 0
            total_reward = 0
            # If the path has been finished according to the oracle, allow rolling out STEPS_TO_KILL more steps
            # If we finish the segment, but don't stop, log the position at which we finish the segment
            oracle_finished_countdown = params.steps_to_kill

            # Finally the actual policy roll out on the path segment!
            while True:

                # Get oracle action (labels)
                ref_action, _ = params.ref_policy.get_action(
                    state, token_instruction)

                if ref_action is None or step_num == params.horizon:
                    failed = True  # Either veered off too far, or ran out of time. Either way, we consider it a fail
                    print("Failed segment")
                    break

                # Get the policy action (actions to be rolled out)
                action, _ = params.policy.get_action(
                    state, token_instruction)  #, env_id=env_id)

                if action is None:
                    print("POLICY PRODUCED None ACTION")
                    break

                # Choose which action to execute (reference or policy) based on the selected procedure
                exec_action = self.choose_action(params, step_num,
                                                 switch_threshold, ref_action,
                                                 action)

                # action = [vel_x, vel_y, vel_yaw] vel_y is unused currently. Execute the action in the pomdp
                state, reward, done, exceeded, oob = self.env.step(exec_action)

                total_reward += reward

                # Collect the data into a dataset
                sample = {
                    "instruction": string_instruction,
                    "state": state,
                    "ref_action": ref_action,
                    "reward": reward,
                    "done": done,
                    #"metadata": {
                    "seg_path": path[start_idx:end_idx + 1],
                    "path": path,
                    "env_id": env_id,
                    "set_idx": set_idx,
                    "seg_idx": seg_idx,
                    "start_idx": start_idx,
                    "end_idx": end_idx,
                    "action": exec_action,
                    "pol_action": action,
                    #"ref_action": ref_action,
                    #"instruction": string_instruction,
                    "flag": params.getFlag()
                    #}
                }

                segment_dataset.append(sample)
                if not params.isSegmentLevel():
                    env_dataset.append(sample)

                # Do visual feedback and logging
                if params.first_person:
                    self.presenter.show_sample(state, exec_action, reward,
                                               string_instruction)
                if params.plot:
                    self.presenter.plot_paths(segment_dataset,
                                              interactive=True)
                if params.save_samples:
                    file_path = params.getSaveSamplesPath(
                        env_id, set_idx, seg_idx, step_num)
                    self.presenter.save_sample(file_path, state, exec_action,
                                               reward, string_instruction)
                if params.show_action:
                    self.presenter.show_action(ref_action, "ref_action")
                    self.presenter.show_action(exec_action, "exec_action")

                # If the policy is finished, we stop. Otherwise the oracle should just keep outputing
                # examples that say that the policy should output finished at this point
                if exec_action[3] > 0.5 and not params.shouldIgnorePolicyStop(
                ):
                    print("Policy stop!")
                    break
                # If oracle says we're finished, allow a number of steps before terminating.
                if ref_action[3] > 0.5:
                    if oracle_finished_countdown == params.steps_to_kill:
                        drone_pos_force_stop = state.get_pos_2d()
                    oracle_finished_countdown -= 1
                    if oracle_finished_countdown == 0:
                        print("Oracle forced stop!")
                        break
                step_num += 1

            # Call the rollout end callback, so that the model can save any debugging information, such as feature maps
            if callable(getattr(params.policy, "on_rollout_end", None)):
                params.policy.on_rollout_end(env_id, set_idx, seg_idx)

            if params.isSegmentLevel():
                env_dataset.append(segment_dataset)

            # Plot the trajectories for error tracking
            # TODO: Plot entire envs not segment by segment
            if params.save_plots:
                if not params.isSegmentLevel():
                    self.presenter.plot_paths(
                        env_dataset,
                        segment_path=path[start_idx:end_idx + 1],
                        interactive=False,
                        bg=True,
                        world_size=4.7)
                self.presenter.save_plot(
                    params.getSavePlotPath(env_id, set_idx, seg_idx))

            # Calculate end of segment error
            if end_idx > len(path) - 1:
                end_idx = len(path) - 1

            # The reward is proportional to path length. Weigh it down, so that max reward is 1:
            seg_len = end_idx - start_idx
            #self.error_tracker.add_sample(not failed, drone_pos_force_stop, state.get_pos(), path[end_idx],
            #                              path[end_idx - 1], total_reward, seg_len)

            if params.first_segment_only:
                print("Only running the first segment")
                break

            #sleep(sleepytime)

        return env_dataset

    def roll_out_policy(self, params):
        """
        Given the provided rollout parameters, spawn a simulator instance and execute the specified policy on all
        environments specified in params.setEnvIds.

        Awful function that really needs to be simplified.
        A lot of the code is simply checking various error conditions, because the data has issues, and logging the outcome.
        The actual rollout is a very small part of the code.
        :param params: RollOutParams instance defining the parameters of the rollout
        :return: Aggregated dataset with images, states and oracle actions.
        If params.isSegmentLevel(), the returned dataset will be a list (over environments) of samples
        otherwise it will be a list (over environments) of lists (over segments) of samples
        """

        if params.isDebug():
            run_metadata.WRITE_DEBUG_DATA = True

        dataset = []
        try:
            # Load the neural network policy from file
            # We can't just pass a neural network into this function, because it can't be pickled
            params.loadPolicy()
            assert params.hasPolicy()

            self.env = PomdpInterface(instance_id=self.instance_id,
                                      is_real=params.real_drone)

            all_env_ids, all_instructions, corpus, token2term, self.word2token = self.load_all_envs(
            )
            env_ids = params.envs  # if params.envs is not None else all_env_ids
            seg_indices = params.seg_list
            custom_instructions = params.custom_instructions

            # Filter out the envs that are not in all_instructions (we don't have instructions available for them)
            valid_env_ids = [i for i in env_ids if i in all_instructions]

            count = 0

            # Loop through environments
            for i, env_id in enumerate(valid_env_ids):
                #print ("Rolling out on env: " + str(env_id))
                # Loop through all non-empty sets of instructions for each pomdp
                instruction_sets = [
                    s for s in all_instructions[env_id] if len(s) > 0
                ]

                if len(instruction_sets) == 0:
                    print("No instruction sets for env: " + str(env_id))

                for j, instructions_set in enumerate(instruction_sets):
                    count += 1
                    try:
                        seg_id = seg_indices[
                            i] if seg_indices is not None else None
                        custom_instr = custom_instructions[
                            i] if custom_instructions is not None else None
                        import rollout.run_metadata as md
                        md.CUSTOM_INSTR_NO = i
                        # TODO: Check if this works!
                        dataset.append(
                            self.roll_out_on_env(params, instructions_set, j,
                                                 seg_id, custom_instr))
                        #log("Path finished!")
                        DebugWriter().commit()

                        if params.isRealDrone():
                            break

                    except Exception as e:
                        import traceback
                        from utils.colors import print_error
                        print_error("Error encountered during policy rollout!")
                        print_error(e)
                        print_error(traceback.format_exc())
                        continue

        except Exception as e:
            import traceback
            from utils.colors import print_error
            print_error("Error encountered during policy rollout!")
            print_error(e)
            print_error(traceback.format_exc())

        self.env.land()

        return dataset
Example #2
0
class DataEvalNL(EvaluateBase):
    def __init__(self,
                 run_name="",
                 save_images=True,
                 entire_trajectory=True,
                 custom_instr=None,
                 aug_len=None):
        super(EvaluateBase, self).__init__()
        self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions()
        self.all_i = {**self.train_i, **self.test_i, **self.dev_i}
        self.passing_distance = P.get_current_parameters(
        )["Units"]["passing_distance"]
        self.results = ResultsLandmarkSide()
        self.presenter = Presenter()
        self.run_name = run_name
        self.save_images = save_images
        self.entire_trajectory = entire_trajectory
        self.custom_instr = custom_instr
        self.aug_len = aug_len

        self.visible_map = {}

        self.hfov = P.get_current_parameters(
        )["ModelPVN"]["Stage1"]["cam_h_fov"]

    def _has_multiple_segments(self, rollout):
        prev_idx = rollout[0]["metadata"]["seg_idx"] if "metadata" in rollout[
            0] else rollout[0]["seg_idx"]
        for sample in rollout:
            if "metadata" not in sample:
                sample["metadata"] = sample
            if sample["metadata"]["seg_idx"] != prev_idx:
                return True
        return False

    def _split_rollout_in_segments(self, rollout):
        segments = []
        current_segment = [rollout[0]]
        for sample in rollout[1:]:
            if "metadata" not in sample:
                sample["metadata"] = sample
            if sample["metadata"]["seg_idx"] != current_segment[0]["metadata"][
                    "seg_idx"]:
                segments.append(current_segment)
                current_segment = [sample]
            else:
                current_segment.append(sample)
        segments.append(current_segment)
        return segments

    def _segment_matches_auglen(self, segment):
        if not self.aug_len:
            return True
        env_id = segment[0]["env_id"]
        seg_idx = segment[0]["seg_idx"]
        set_idx = segment[0]["set_idx"]
        instr_seg = get_instruction_segment(env_id,
                                            set_idx,
                                            seg_idx,
                                            all_instr=self.all_i)
        return instr_seg["merge_len"] == self.aug_len

    def evaluate_dataset(self, list_of_rollouts):
        for rollout in list_of_rollouts:
            if len(rollout) == 0:
                continue
            if self._has_multiple_segments(rollout):
                segments = self._split_rollout_in_segments(rollout)
                for segment in segments:
                    if self._segment_matches_auglen(segment):
                        seg_results = self.evaluate_rollout(segment)
                        if seg_results is not None:
                            self.results += seg_results
            else:
                if self._segment_matches_auglen(rollout):
                    seg_results = self.evaluate_rollout(rollout)
                    if seg_results is not None:
                        self.results += seg_results
        self.save_results()

    def rollout_success(self, env_id, set_idx, seg_idx, rollout):
        path = load_and_convert_path(env_id)
        seg_ordinal = seg_idx_to_ordinal(
            self.all_i[env_id][set_idx]["instructions"], seg_idx)
        path_end_idx = self.all_i[env_id][set_idx]["instructions"][
            seg_ordinal]["end_idx"]
        if path_end_idx > len(path) - 1:
            path_end_idx = len(path) - 1
        end_pos = np.asarray(rollout[-1]["state"].get_pos_2d())
        target_end_pos = np.asarray(path[path_end_idx])
        end_dist = np.linalg.norm(end_pos - target_end_pos)
        success = end_dist < self.passing_distance
        return success

    def is_goal_visible(self, instr_seg):
        end = np.asarray(instr_seg["end_pos"])
        start = np.asarray(instr_seg["start_pos"])
        vec_start_to_end = end - start
        endp_yaw = vec_to_yaw(vec_start_to_end)
        start_yaw = instr_seg["start_yaw"]
        yaw_diff = endp_yaw - start_yaw
        yaw_diff_abs = math.fabs(clip_angle(yaw_diff))
        goal_visible = 2 * yaw_diff_abs < math.radians(self.hfov)
        return goal_visible

    def _filter_path(self, posseq, dst=0.02):
        """Replace original points in the path with equally spaced points"""
        cumdist = 0
        cumdists = [cumdist]
        for prev_pos, pos in zip(posseq[:-1], posseq[1:]):
            gap = np.linalg.norm(pos - prev_pos)
            cumdist += gap
            cumdists.append(cumdist)

        total_path_length = cumdists[-1]
        p = 0
        ptr = 0
        traj_out = []
        # Add the starting point, and move to the next point
        pt = posseq[ptr]
        traj_out.append(pt)
        p += dst
        # Reconstruct the trajectory with equidistant points of fixed precision.
        while p < total_path_length and ptr < len(posseq):
            # Get how far along until the next point this is
            frac = (p - cumdists[ptr - 1]) / (cumdists[ptr] -
                                              cumdists[ptr - 1] + 1e-10)
            # Grab interpolated intermediate point
            pt = posseq[ptr - 1] + (posseq[ptr] - posseq[ptr - 1]) * frac
            traj_out.append(pt)
            p += dst
            # Advance past the correct starting point
            while ptr < len(cumdists) and p > cumdists[ptr]:
                ptr += 1

        out = np.asarray(traj_out)

        if False:
            plt = np.zeros((470, 470, 3))
            for pt in posseq:
                pt *= 100
                plt[int(pt[0]):int(pt[0]) + 2,
                    int(pt[1]):int(pt[1]) + 2, 0] = 1.0
            for pt in out:
                pt *= 100
                plt[int(pt[0]):int(pt[0]) + 2,
                    int(pt[1]):int(pt[1]) + 2, 2] = 1.0
            Presenter().show_image(plt, "filter_paths", scale=4, waitkey=True)

        return out

    def _calculate_emd(self, exec_path, gt_path):
        exec_len = len(exec_path)
        gt_len = len(gt_path)
        if gt_len == 0:
            return None
        p2p_differences = exec_path[np.newaxis, :, :] - gt_path[:,
                                                                np.newaxis, :]
        p2p_distances = np.linalg.norm(p2p_differences, axis=2)
        # rows index over ground truth path, columns index over executed path
        # Distribute probability mass of 1 evenly over executed and ground-truth trajectories
        prob_masses_exec = np.asarray([1 / float(exec_len + 1e-10)] * exec_len)
        prob_masses_gt = np.asarray([1 / float(gt_len + 1e-10)] * gt_len)

        assert np.isclose(prob_masses_exec.sum(), 1.0)
        assert np.isclose(prob_masses_gt.sum(), 1.0)
        #print("ding")
        ot_plan, log = ot.emd(prob_masses_gt,
                              prob_masses_exec,
                              p2p_distances,
                              log=True,
                              numItermax=10000)
        emd = log["cost"]
        assert emd > 0, "There is no way that a drone will perfectly follow a trajectory! Something is wrong. EMD error?"
        return emd

    def evaluate_rollout(self, rollout):
        last_sample = rollout[-1]
        if "metadata" not in last_sample:
            last_sample["metadata"] = last_sample
        env_id = last_sample["metadata"]["env_id"]
        # TEMPORARY FOR APPENDIX TABLE! REMOVE IT!
        # if env_id >= 6000:
        #    return None
        seg_idx = last_sample["metadata"]["seg_idx"]
        set_idx = last_sample["metadata"]["set_idx"]

        path = load_and_convert_path(env_id)

        seg_ordinal = seg_idx_to_ordinal(
            self.all_i[env_id][set_idx]["instructions"], seg_idx)
        instr_seg = self.all_i[env_id][set_idx]["instructions"][seg_ordinal]

        if self.entire_trajectory:
            path_end_idx = len(path) - 1
            path_start_idx = 0
        else:
            # Find the segment end index
            path_end_idx = self.all_i[env_id][set_idx]["instructions"][
                seg_ordinal]["end_idx"] + 1
            path_start_idx = self.all_i[env_id][set_idx]["instructions"][
                seg_ordinal]["start_idx"]
            if path_end_idx > len(path) - 1:
                path_end_idx = len(path) - 1
            if path_end_idx < path_start_idx:
                path_start_idx = path_end_idx

        seg_path = path[path_start_idx:path_end_idx]
        goal_visible = self.is_goal_visible(instr_seg)
        self.visible_map[f"{env_id}_{seg_idx}"] = (1 if goal_visible else 0)
        exec_path = np.asarray([r["state"].get_pos_2d() for r in rollout])

        end_pos = np.asarray(exec_path[-1])  #["state"].get_pos_2d())
        target_end_pos = np.asarray(seg_path[-1])
        end_dist = np.linalg.norm(end_pos - target_end_pos)
        success = end_dist < self.passing_distance

        # EMD between trajectories, and EMD between start position and trajectory.
        exec_path = self._filter_path(exec_path)
        gt_path = self._filter_path(seg_path)
        emd = self._calculate_emd(exec_path, gt_path)
        stop_emd = self._calculate_emd(exec_path[0:1], gt_path)

        # Success weighted by earth-mover's distance
        nemd = emd / stop_emd
        semd = max((1 if success else 0) * (1 - nemd), 0)

        if last_sample["metadata"]["pol_action"][3] > 0.5:
            who_stopped = "Policy Stopped"
        elif last_sample["metadata"]["ref_action"][3] > 0.5:
            who_stopped = "Oracle Stopped"
        else:
            who_stopped = "Veered Off"

        result = "Success" if success else "Fail"
        print(env_id, set_idx, seg_idx, result)

        texts = [who_stopped, result, "run:" + self.run_name]

        #print(seg_idx, result, semd)

        if self.save_images and emd:
            dir = get_results_dir(self.run_name, makedir=True)
            print("Results dir: ", dir)
            # TODO: Refactor this to not pull path from rollout, but provide it explicitly
            self.presenter.plot_paths(
                rollout,
                segment_path=gt_path,
                interactive=False,
                texts=texts,
                entire_trajectory=self.entire_trajectory,
                world_size=P.get_current_parameters()["Setup"]["world_size_m"],
                real_drone=P.get_current_parameters()["Setup"]["real_drone"])
            filename = os.path.join(
                dir,
                str(env_id) + "_" + str(set_idx) + "_" + str(seg_idx))
            if self.custom_instr is not None:
                filename += "_" + last_sample["metadata"][
                    "instruction"][:24] + "_" + last_sample["metadata"][
                        "instruction"][-16:]
            self.presenter.save_plot(filename)

        #if emd:
        #    self.save_results()

        return ResultsLandmarkSide(success=success,
                                   end_dist=end_dist,
                                   goal_visible=goal_visible,
                                   emd=emd,
                                   semd=semd,
                                   nemd=nemd)

    def write_summaries(self, run_name, name, iteration):
        results_dict = self.get_results()
        writer = LoggingSummaryWriter(
            log_dir=f"{get_logging_dir()}/runs/{run_name}", restore=True)
        if not K_AVG_DIST in results_dict:
            print("nothing to write")
            return
        writer.add_scalar(name + "/avg_dist_to_goal", results_dict[K_AVG_DIST],
                          iteration)
        writer.add_scalar(name + "/success_rate", results_dict[K_RATE],
                          iteration)
        #writer.save_spied_values()

    def get_results(self):
        return self.results.get_dict()

    def save_results(self):
        # Write results dict
        path = get_results_path(self.run_name, makedir=True)
        with open(path, "w") as fp:
            json.dump(self.get_results(), fp)
Example #3
0
class DataEvalLandmarkSide(EvaluateBase):
    def __init__(self, run_name="", save_images=True):
        super(EvaluateBase, self).__init__()
        self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions()
        self.passing_distance = LANDMARK_REGION_RADIUS
        self.results = ResultsLandmarkSide()
        self.presenter = Presenter()
        self.run_name = run_name
        self.save_images = save_images

    def evaluate_dataset(self, list_of_rollouts):
        for rollout in list_of_rollouts:
            self.results += self.evaluate_rollout(rollout)

    def get_landmark_pos(self, env_id):
        template = load_template(env_id)
        config = load_env_config(env_id)
        landmark_idx = config["landmarkName"].index(template["landmark1"])
        pos_x = config["xPos"][landmark_idx]
        pos_y = config["zPos"][landmark_idx]
        landmark_pos = np.asarray([pos_x, pos_y])
        return landmark_pos

    def correct_side(self, rollout, env_id):
        template = load_template(env_id)
        landmark_pos = self.get_landmark_pos(env_id)

        last_pos = rollout[-1].state.get_pos()
        first_pos = rollout[0].state.get_pos()
        dir_landmark = landmark_pos - first_pos

        if len(N_SIDES) == 4:
            dir_lm_to_last = last_pos - landmark_pos
            dir_landmark_norm = dir_landmark / (np.linalg.norm(dir_landmark) +
                                                1e-18)
            dir_ortho_norm = np.asarray(
                [dir_landmark_norm[1], -dir_landmark_norm[0]])

            proj = np.dot(dir_lm_to_last, dir_landmark_norm)
            opp_proj = np.dot(dir_lm_to_last, dir_ortho_norm)

            angle = math.atan2(proj, opp_proj)

            DEG45 = 0.785398
            if template["side"] == "right":
                return -DEG45 < angle < DEG45
            elif template["side"] == "back":
                return DEG45 < angle < 3 * DEG45
            elif template["side"] == "left":
                return 3 * DEG45 < angle < math.pi or -math.pi < angle < -3 * DEG45
            elif template["side"] == "front":
                return -3 * DEG45 < angle < -DEG45
            else:
                print("Unknown side: ", template["side"])

            print("Angle: ", angle)
        else:  # len(N_SIDES) = 2
            dir_end = last_pos - first_pos
            z = np.cross(dir_landmark, dir_end)

            if template["side"] == "left":
                return z > 0
            else:
                return z < 0

    def evaluate_rollout(self, rollout):
        last_sample = rollout[-1]
        env_id = last_sample["metadata"]["env_id"]
        seg_idx = last_sample["metadata"]["seg_idx"]
        set_idx = last_sample["metadata"]["set_idx"]
        # TODO: Allow multiple templates / instructions per env
        path = load_path(env_id)

        end_pos = np.asarray(last_sample["state"].get_pos())
        landmark_pos = self.get_landmark_pos(env_id)

        target_end_pos = np.asarray(path[-1])
        end_goal_dist = np.linalg.norm(end_pos - target_end_pos)
        end_lm_dist = np.linalg.norm(end_pos - landmark_pos)
        correct_landmark_region = end_lm_dist < LANDMARK_REGION_RADIUS
        correct_quadrant = self.correct_side(rollout, env_id)

        if last_sample["metadata"]["pol_action"][3] > 0.5:
            who_stopped = "Policy Stopped"
        elif last_sample["metadata"]["ref_action"][3] > 0.5:
            who_stopped = "Oracle Stopped"
        else:
            who_stopped = "Veered Off"

        success = correct_landmark_region and correct_quadrant

        side_txt = "Correct landmark" if correct_landmark_region else "Wrong landmark"
        result = "Success" if success else "Fail"
        texts = [who_stopped, result, side_txt, "run:" + self.run_name]

        if self.save_images:
            dir = get_results_dir(self.run_name, makedir=True)
            self.presenter.plot_paths(rollout, interactive=False,
                                      texts=[])  #texts)
            filename = os.path.join(
                dir,
                str(env_id) + "_" + str(set_idx) + "_" + str(seg_idx))
            self.presenter.save_plot(filename)
            self.save_results()

        return ResultsLandmarkSide(success, end_goal_dist,
                                   correct_landmark_region)

    def write_summaries(self, run_name, name, iteration):
        results_dict = self.get_results()
        writer = LoggingSummaryWriter(log_dir="runs/" + run_name, restore=True)
        if not K_AVG_DIST in results_dict:
            print("nothing to write")
            return
        writer.add_scalar(name + "/avg_dist_to_goal", results_dict[K_AVG_DIST],
                          iteration)
        writer.add_scalar(name + "/success_rate", results_dict[K_RATE],
                          iteration)
        writer.save_spied_values()

    def get_results(self):
        return self.results.get_dict()

    def save_results(self):
        # Write results dict
        path = get_results_path(self.run_name, makedir=True)
        with open(path, "w") as fp:
            json.dump(self.get_results(), fp)
Example #4
0
class DataEvalNL (EvaluateBase):

    def __init__(self, run_name="", save_images=True, entire_trajectory=True, custom_instr=None):
        super(EvaluateBase, self).__init__()
        self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions()
        self.all_i = {**self.train_i, **self.test_i, **self.dev_i}
        self.passing_distance = DEFAULT_PASSING_DISTANCE
        self.results = ResultsLandmarkSide()
        self.presenter = Presenter()
        self.run_name = run_name
        self.save_images = save_images
        self.entire_trajectory = entire_trajectory
        self.custom_instr = custom_instr

    def evaluate_dataset(self, list_of_rollouts):
        for rollout in list_of_rollouts:
            if len(rollout) == 0:
                continue
            self.results += self.evaluate_rollout(rollout)

    def evaluate_rollout(self, rollout):
        last_sample = rollout[-1]
        env_id = last_sample["metadata"]["env_id"]
        seg_idx = last_sample["metadata"]["seg_idx"]
        set_idx = last_sample["metadata"]["set_idx"]

        # TODO: Allow multiple instruction sets / paths per env
        path = load_path(env_id)

        if self.entire_trajectory:
            path_end_idx = len(path) - 1
        else:
            # Find the segment end index
            path_end_idx = self.all_i[env_id][set_idx]["instructions"][seg_idx]["end_idx"]
            if path_end_idx > len(path) - 1:
                path_end_idx = len(path) - 1

        end_pos = np.asarray(last_sample["state"].get_pos())
        target_end_pos = np.asarray(path[path_end_idx])
        end_dist = np.linalg.norm(end_pos - target_end_pos)
        success = end_dist < DEFAULT_PASSING_DISTANCE

        if last_sample["metadata"]["pol_action"][3] > 0.5:
            who_stopped = "Policy Stopped"
        elif last_sample["metadata"]["ref_action"][3] > 0.5:
            who_stopped = "Oracle Stopped"
        else:
            who_stopped = "Veered Off"

        result = "Success" if success else "Fail"
        texts = [who_stopped, result, "run:" + self.run_name]

        print(seg_idx, result)

        if self.save_images:
            dir = get_results_dir(self.run_name, makedir=True)
            print("Results dir: ", dir)
            self.presenter.plot_paths(rollout, interactive=False, texts=texts, entire_trajectory=self.entire_trajectory)
            filename = os.path.join(dir, str(env_id) + "_" + str(set_idx) + "_" + str(seg_idx))
            if self.custom_instr is not None:
                filename += "_" + last_sample["metadata"]["instruction"][:24] + "_" + last_sample["metadata"]["instruction"][-16:]
            self.presenter.save_plot(filename)
            self.save_results()

        return ResultsLandmarkSide(success, end_dist)

    def write_summaries(self, run_name, name, iteration):
        results_dict = self.get_results()
        writer = LoggingSummaryWriter(log_dir="runs/" + run_name, restore=True)
        if not K_AVG_DIST in results_dict:
            print("nothing to write")
            return
        writer.add_scalar(name + "/avg_dist_to_goal", results_dict[K_AVG_DIST], iteration)
        writer.add_scalar(name + "/success_rate", results_dict[K_RATE], iteration)
        writer.save_spied_values()

    def get_results(self):
        return self.results.get_dict()

    def save_results(self):
        # Write results dict
        path = get_results_path(self.run_name, makedir=True)
        with open(path, "w") as fp:
            json.dump(self.get_results(), fp)