Exemple #1
0
    def freespace(seed=4, trials=200, trial_length=50, force_gui=False):
        env = PegGetter.env(p.GUI if force_gui else p.DIRECT, 0)
        u_min, u_max = env.get_control_bounds()
        ctrl = controller.FullRandomController(env.nu, u_min, u_max)
        # use mode p.GUI to see what the trials look like
        save_dir = '{}{}'.format(PegGetter.env_dir, 0)
        sim = peg_in_hole.PegInHole(env,
                                    ctrl,
                                    num_frames=trial_length,
                                    plot=False,
                                    save=True,
                                    stop_when_done=False,
                                    save_dir=save_dir)
        rand.seed(seed)
        # randomly distribute data
        for _ in range(trials):
            seed = rand.seed()
            # start at fixed location
            hole, init_peg = OfflineDataCollection.random_config(env)
            env.set_task_config(hole=hole, init_peg=init_peg)
            ctrl = controller.FullRandomController(env.nu, u_min, u_max)
            sim.ctrl = ctrl
            sim.run(seed)

        if sim.save:
            load_data.merge_data_in_dir(cfg, save_dir, save_dir)
        plt.ioff()
        plt.show()
Exemple #2
0
    def freespace(seed_offset=0, trials=200, trial_length=50, force_gui=False):
        env = PegRealGetter.env(level=0, stub=False)
        u_min, u_max = env.get_control_bounds()
        ctrl = controller.FullRandomController(env.nu, u_min, u_max)
        # use mode p.GUI to see what the trials look like
        save_dir = '{}{}'.format(PegRealGetter.env_dir, 0)
        sim = peg_in_hole_real.ExperimentRunner(env,
                                                ctrl,
                                                num_frames=trial_length,
                                                plot=False,
                                                save=True,
                                                stop_when_done=False,
                                                save_dir=save_dir)
        # randomly distribute data
        for offset in range(trials):
            seed = rand.seed(seed_offset + offset)
            move = input('specify dx and dy to move to')
            dx, dy = [float(dim) for dim in move.split()]
            env.reset([dx, dy])
            obs = env.state

            run_name = "{}_{}_{}_{}".format(seed, obs[0].round(3),
                                            obs[1].round(3), obs[2].round(3))
            # start at fixed location
            ctrl = controller.FullRandomController(env.nu, u_min, u_max)
            sim.ctrl = ctrl
            with peg_in_hole_real.VideoLogger():
                sim.run(seed, run_name=run_name)

        env.close()
        if sim.save:
            load_data.merge_data_in_dir(cfg, save_dir, save_dir)
        plt.ioff()
        plt.show()
Exemple #3
0
    def test_set():
        # get data in and around the bug trap we want to avoid in the future
        env = PegGetter.env(p.GUI, task_map['Peg-T'])
        env.set_task_config(init_peg=[0.1, 0.12])

        def rn(scale):
            return np.random.randn() * scale

        u = []
        seed = rand.seed(2)
        for _ in range(5):
            u.append([0.4, 0.7 + rn(0.5)])
        for i in range(15):
            u.append([-0.0 + (i - 7) * 0.1, 0.8 + rn(0.5)])
        for i in range(15):
            u.append([-0.8 + rn(0.2), -0. + (i - 7) * 0.1])
        for i in range(5):
            u.append([-0.1 + rn(0.1), -1.])
        u.append([-0.6, -0.])
        for i in range(10):
            u.append([-0. + rn(0.5), 0.9])

        ctrl = controller.PreDeterminedController(np.array(u),
                                                  *env.get_control_bounds())
        sim = peg_in_hole.PegInHole(env,
                                    ctrl,
                                    num_frames=len(u),
                                    plot=False,
                                    save=True,
                                    stop_when_done=False)
        sim.run(seed, 'peg_contact_test_set')
Exemple #4
0
    def freespace(seed_offset=0, trials=200, trial_length=50, force_gui=False):
        env = ArmGetter.env(level=0, mode=p.GUI if force_gui else p.DIRECT)
        u_min, u_max = env.get_control_bounds()
        ctrl = controller.FullRandomController(env.nu, u_min, u_max)
        # use mode p.GUI to see what the trials look like
        save_dir = '{}{}'.format(ArmGetter.env_dir, 0)
        sim = arm.ExperimentRunner(env,
                                   ctrl,
                                   num_frames=trial_length,
                                   plot=False,
                                   save=True,
                                   stop_when_done=False,
                                   save_dir=save_dir)
        # randomly distribute data
        for offset in range(trials):
            seed = rand.seed(seed_offset + offset)
            # random position
            init = [(np.random.random() - 0.5) * 1.7,
                    (np.random.random() - 0.5) * 1.7,
                    np.random.random() * 0.5]
            env.set_task_config(init=init)
            ctrl = controller.FullRandomController(env.nu, u_min, u_max)
            sim.ctrl = ctrl
            sim.run(seed)

        if sim.save:
            load_data.merge_data_in_dir(cfg, save_dir, save_dir)
        plt.ioff()
        plt.show()
Exemple #5
0
    def free_space_env_init(cls, seed=1, **kwargs):
        d = get_device()
        env = cls.env(kwargs.pop('mode', 0), **kwargs)
        ds = cls.ds(env, cls.data_dir(0), validation_ratio=0.1)

        logger.info("initial random seed %d", rand.seed(seed))
        return d, env, ds.current_config(), ds
Exemple #6
0
def test_env_control():
    init_block_pos = [-0.5, 0.1]
    init_block_yaw = -math.pi / 2
    face = block_push.BlockFace.LEFT
    along_face = 0
    # env = block_push.PushWithForceDirectlyReactionInStateEnv(dynamics_class=p.GUI, init_pusher=along_face, log_video=True,
    #                                                          init_block=init_block_pos, init_yaw=init_block_yaw,
    #                                                          environment_level=1)
    env = block_push.PushPhysicallyAnyAlongEnv(mode=p.GUI,
                                               log_video=True,
                                               init_block=init_block_pos,
                                               init_yaw=init_block_yaw,
                                               environment_level=1)
    seed = rand.seed(0)
    # env.sim_step_wait = 0.01
    u = []

    # u.append((0, 1, 0))
    # u.append((1, 1, 0))
    # u.append((-1, 1, 0))

    # for _ in range(80):
    #     u.append((0., 1, 0.))
    N = 40
    u_dir = np.linspace(0, -1, N)
    u_mag = np.linspace(1, 0, N)
    for i in range(N):
        # u.append((0, 1, np.random.randn()))
        # u.append((0.1, u_mag[i], u_dir[i]))
        u.append((-0.5, 0.5 + np.random.rand(), 1.0))

    ctrl = controller.PreDeterminedController(u)
    sim = block_push.InteractivePush(env,
                                     ctrl,
                                     num_frames=len(u),
                                     plot=True,
                                     save=True)
    sim.run(seed)
    plt.ioff()
    plt.show()
Exemple #7
0
    def freespace(seed_offset=0, trials=200, trial_length=50, force_gui=False):
        env = GridGetter.env(level=0, check_boundaries=False)
        u_min, u_max = env.get_control_bounds()
        ctrl = controller.FullRandomController(env.nu, u_min, u_max)
        # use mode p.GUI to see what the trials look like
        save_dir = '{}{}'.format(GridGetter.env_dir, 0)
        sim = gridworld.ExperimentRunner(env,
                                         ctrl,
                                         num_frames=trial_length,
                                         plot=False,
                                         save=True,
                                         pause_s_between_steps=0.01,
                                         stop_when_done=False,
                                         save_dir=save_dir)
        rospy.sleep(0.5)
        sim.clear_markers()
        # randomly distribute data
        for offset in range(trials):
            seed = rand.seed(seed_offset + offset)
            # random position
            init = [int(np.random.random() * max_val) for max_val in env.size]
            env.set_task_config(init=init)
            ctrl = controller.FullRandomController(env.nu, u_min, u_max)
            sim.ctrl = ctrl
            with recorder.WindowRecorder(
                    window_names=("RViz*", "RViz", "gridworld.rviz - RViz",
                                  "gridworld.rviz* - RViz"),
                    name_suffix="rviz",
                    frame_rate=30.0,
                    save_dir=cfg.VIDEO_DIR):
                sim.run(seed)

        if sim.save:
            load_data.merge_data_in_dir(cfg, save_dir, save_dir)
        plt.ioff()
        plt.show()
Exemple #8
0
def run_controller(
        default_run_prefix,
        pre_run_setup,
        seed=1,
        level=1,
        gating=None,
        use_tsf=UseTsf.COORD,
        nominal_adapt=OnlineAdapt.NONE,
        autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE,
        use_demo=False,
        use_trap_cost=True,
        reuse_escape_as_demonstration=False,
        num_frames=200,
        run_prefix=None,
        run_name=None,
        assume_all_nonnominal_dynamics_are_traps=False,
        rep_name=None,
        visualize_rollout=False,
        override_tampc_params=None,
        override_mpc_params=None,
        never_estimate_error=False,
        apfvo_baseline=False,
        apfsp_baseline=False,
        **kwargs):
    env = PegRealGetter.env(level=level, stub=False)
    logger.info("initial random seed %d", rand.seed(seed))

    ds, pm = PegRealGetter.prior(env, use_tsf, rep_name=rep_name)

    dss = [ds]
    demo_trajs = []
    for demo in demo_trajs:
        ds_local = PegRealGetter.ds(env, demo, validation_ratio=0.)
        ds_local.update_preprocessor(ds.preprocessor)
        dss.append(ds_local)

    hybrid_dynamics = hybrid_model.HybridDynamicsModel(
        dss,
        pm,
        env.state_difference, [use_tsf.name],
        device=get_device(),
        preprocessor=no_tsf_preprocessor(),
        nominal_model_kwargs={'online_adapt': nominal_adapt},
        local_model_kwargs=kwargs)

    # we're always going to be in the nominal mode in this case; might as well speed up testing
    if not use_demo and not reuse_escape_as_demonstration:
        gating = AlwaysSelectNominal()
    else:
        gating = hybrid_dynamics.get_gating() if gating is None else gating

    tampc_opts, mpc_opts = PegRealGetter.controller_options(env)
    if override_tampc_params is not None:
        tampc_opts.update(override_tampc_params)
    if override_mpc_params is not None:
        mpc_opts.update(override_mpc_params)

    logger.debug(
        "running with parameters\nhigh level controller: %s\nlow level MPC: %s",
        pprint.pformat(tampc_opts), pprint.pformat(mpc_opts))

    if apfvo_baseline or apfsp_baseline:
        tampc_opts.pop('trap_cost_annealing_rate')
        tampc_opts.pop('abs_unrecognized_threshold')
        tampc_opts.pop('dynamics_minimum_window')
        tampc_opts.pop('max_trap_weight')
        if apfvo_baseline:
            ctrl = online_controller.APFVO(ds,
                                           hybrid_dynamics,
                                           ds.original_config(),
                                           gating=gating,
                                           local_min_threshold=0.005,
                                           trap_max_dist_influence=0.02,
                                           repulsion_gain=0.01,
                                           **tampc_opts)
        if apfsp_baseline:
            ctrl = online_controller.APFSP(ds,
                                           hybrid_dynamics,
                                           ds.original_config(),
                                           gating=gating,
                                           trap_max_dist_influence=0.045,
                                           **tampc_opts)
    else:
        ctrl = online_controller.OnlineMPPI(
            ds,
            hybrid_dynamics,
            ds.original_config(),
            gating=gating,
            autonomous_recovery=autonomous_recovery,
            assume_all_nonnominal_dynamics_are_traps=
            assume_all_nonnominal_dynamics_are_traps,
            reuse_escape_as_demonstration=reuse_escape_as_demonstration,
            never_estimate_error_dynamics=never_estimate_error,
            use_trap_cost=use_trap_cost,
            **tampc_opts,
            mpc_opts=mpc_opts)

    z = 0.98
    goal = np.r_[env.hole, z, 0, 0]
    ctrl.set_goal(goal)

    sim = peg_in_hole_real.ExperimentRunner(env,
                                            ctrl,
                                            num_frames=num_frames,
                                            plot=False,
                                            save=True,
                                            stop_when_done=True)
    seed = rand.seed(seed)

    if run_name is None:

        def affix_run_name(*args):
            nonlocal run_name
            for token in args:
                run_name += "__{}".format(token)

        def get_rep_model_name(ds):
            import re
            tsf_name = ""
            try:
                for tsf in ds.preprocessor.tsf.transforms:
                    if isinstance(tsf, invariant.InvariantTransformer):
                        tsf_name = tsf.tsf.name
                        tsf_name = re.match(r".*?s\d+", tsf_name)[0]
            except AttributeError:
                pass
            # TODO also include model name
            return tsf_name

        run_name = default_run_prefix
        if apfvo_baseline:
            run_prefix = 'APFVO'
        elif apfsp_baseline:
            run_prefix = 'APFSP'
        if run_prefix is not None:
            affix_run_name(run_prefix)
        affix_run_name(nominal_adapt.name)
        if not apfvo_baseline and not apfsp_baseline:
            affix_run_name(autonomous_recovery.name +
                           ("_WITHDEMO" if use_demo else ""))
        if never_estimate_error:
            affix_run_name('NO_E')
        affix_run_name(level)
        affix_run_name(use_tsf.name)
        affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps
                       else "SOMETRAP")
        affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE")
        affix_run_name(gating.name)
        affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST")
        affix_run_name(get_rep_model_name(ds))
        affix_run_name(seed)
        affix_run_name(num_frames)

    time.sleep(1)
    sim.clear_markers()
    time.sleep(1)
    sim.dd.draw_text("seed", "s{}".format(seed), 1, left_offset=-1.4)
    sim.dd.draw_text("recovery_method",
                     "recovery {}".format(autonomous_recovery.name),
                     2,
                     left_offset=-1.4)
    if reuse_escape_as_demonstration:
        sim.dd.draw_text("resuse", "reuse escape", 3, left_offset=-1.4)
    sim.dd.draw_text("run_name", run_name, 18, left_offset=-0.8, scale=3)
    with peg_in_hole_real.VideoLogger():
        pre_run_setup(env, ctrl, ds)

        sim.run(seed, run_name)
        logger.info("last run cost %f", np.sum(sim.last_run_cost))
        time.sleep(2)
    plt.ioff()
    plt.show()
Exemple #9
0
    ENV_NAME = "Pendulum-v0"
    TIMESTEPS = 15  # T
    N_SAMPLES = 100  # K
    ACTION_LOW = -2.0
    ACTION_HIGH = 2.0
    USE_PREVIOUS_TRIAL_DATA = False
    SAVE_TRIAL_DATA = False
    num_frames = 500

    d = torch.device("cuda") if torch.cuda.is_available() else torch.device(
        "cpu")
    d = torch.device("cpu")
    dtype = torch.double

    seed = 6
    logger.info("random seed %d", rand.seed(seed))
    save_dir = os.path.join(cfg.DATA_DIR, ENV_NAME)
    save_to = os.path.join(save_dir, "{}.mat".format(seed))

    # new hyperparmaeters for approximate dynamics
    TRAIN_EPOCH = 150  # need more epochs if we're freezing prior (~800)
    BOOT_STRAP_ITER = 100

    nx = 2
    nu = 1

    Q = torch.tensor([[1, 0], [0, 0.1]], dtype=dtype, device=d)
    R = 0.001

    config = load_data.DataConfig(predict_difference=True,
                                  predict_all_dims=True)
Exemple #10
0
        # Kalman update
        innovation = reward - C @ pred_mean  # tilde y_k
        innovation_cov = C @ pred_cov @ C.t() + obs_cov  # S_k
        kalman_gain = pred_cov @ C.t() @ innovation_cov.inverse()  # K_k

        # a posteriori estimate
        self._mean = pred_mean + kalman_gain @ innovation
        self._cov = pred_cov - kalman_gain @ C @ pred_cov
        # fix to be symmetric
        self._cov = (self._cov + self._cov.t()) * 0.5


if __name__ == "__main__":
    from arm_pytorch_utilities import rand

    rand.seed(0)

    num_arms = 7
    obs_noise = torch.ones(1) * 1
    process_noise_scaling = 0.1
    num_costs = 3
    cost_weights = torch.rand((num_arms, num_costs))
    # each arm is a row of the cost weight; normalize so it sums to 1
    cost_weights /= cost_weights.sum(dim=1).view(num_arms, 1)
    # give special meaning to the first few arms (they are 1-hot)
    cost_weights[:num_costs, :num_costs] = torch.eye(num_costs)

    print("cost weights")
    print(cost_weights)

    def _calculate_mab_process_noise():
Exemple #11
0
def run_controller(
        default_run_prefix,
        pre_run_setup,
        seed=1,
        level=1,
        gating=None,
        use_tsf=UseTsf.COORD,
        nominal_adapt=OnlineAdapt.NONE,
        autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE,
        use_demo=False,
        use_trap_cost=True,
        reuse_escape_as_demonstration=False,
        num_frames=200,
        run_prefix=None,
        run_name=None,
        assume_all_nonnominal_dynamics_are_traps=False,
        rep_name=None,
        visualize_rollout=False,
        override_tampc_params=None,
        override_mpc_params=None,
        **kwargs):
    env = GridGetter.env(level=level)
    logger.info("initial random seed %d", rand.seed(seed))

    ds, pm = GridGetter.prior(env, use_tsf, rep_name=rep_name)

    dss = [ds]
    demo_trajs = []
    for demo in demo_trajs:
        ds_local = GridGetter.ds(env, demo, validation_ratio=0.)
        ds_local.update_preprocessor(ds.preprocessor)
        dss.append(ds_local)

    hybrid_dynamics = hybrid_model.HybridDynamicsModel(
        dss,
        pm,
        env.state_difference, [use_tsf.name],
        device=get_device(),
        preprocessor=no_tsf_preprocessor(),
        nominal_model_kwargs={'online_adapt': nominal_adapt},
        local_model_kwargs=kwargs)

    # we're always going to be in the nominal mode in this case; might as well speed up testing
    if not use_demo and not reuse_escape_as_demonstration:
        gating = AlwaysSelectNominal()
    else:
        gating = hybrid_dynamics.get_gating() if gating is None else gating

    tampc_opts, mpc_opts = GridGetter.controller_options(env)
    if override_tampc_params is not None:
        tampc_opts.update(override_tampc_params)
    if override_mpc_params is not None:
        mpc_opts.update(override_mpc_params)

    logger.debug(
        "running with parameters\nhigh level controller: %s\nlow level MPC: %s",
        pprint.pformat(tampc_opts), pprint.pformat(mpc_opts))

    ctrl = online_controller.OnlineMPPI(
        ds,
        hybrid_dynamics,
        ds.original_config(),
        gating=gating,
        autonomous_recovery=autonomous_recovery,
        assume_all_nonnominal_dynamics_are_traps=
        assume_all_nonnominal_dynamics_are_traps,
        reuse_escape_as_demonstration=reuse_escape_as_demonstration,
        use_trap_cost=use_trap_cost,
        **tampc_opts,
        mpc_opts=mpc_opts)

    ctrl.set_goal(env.goal)

    sim = gridworld.ExperimentRunner(env,
                                     ctrl,
                                     num_frames=num_frames,
                                     plot=False,
                                     save=True,
                                     stop_when_done=True)
    seed = rand.seed(seed)

    if run_name is None:

        def affix_run_name(*args):
            nonlocal run_name
            for token in args:
                run_name += "__{}".format(token)

        def get_rep_model_name(ds):
            import re
            tsf_name = ""
            try:
                for tsf in ds.preprocessor.tsf.transforms:
                    if isinstance(tsf, invariant.InvariantTransformer):
                        tsf_name = tsf.tsf.name
                        tsf_name = re.match(r".*?s\d+", tsf_name)[0]
            except AttributeError:
                pass
            # TODO also include model name
            return tsf_name

        run_name = default_run_prefix
        if run_prefix is not None:
            affix_run_name(run_prefix)
        affix_run_name(nominal_adapt.name)
        affix_run_name(autonomous_recovery.name +
                       ("_WITHDEMO" if use_demo else ""))
        affix_run_name(level)
        affix_run_name(use_tsf.name)
        affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps
                       else "SOMETRAP")
        affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE")
        affix_run_name(gating.name)
        affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST")
        affix_run_name(get_rep_model_name(ds))
        affix_run_name(seed)
        affix_run_name(num_frames)

    time.sleep(1)
    sim.clear_markers()
    time.sleep(1)
    sim.dd.draw_text("seed", "s{}".format(seed), 1, left_offset=-1.4)
    sim.dd.draw_text("recovery_method",
                     "recovery {}".format(autonomous_recovery.name),
                     2,
                     left_offset=-1.4)
    if reuse_escape_as_demonstration:
        sim.dd.draw_text("resuse", "reuse escape", 3, left_offset=-1.4)
    sim.dd.draw_text("run_name", run_name, 18, left_offset=-0.8)
    with recorder.WindowRecorder(window_names=("RViz*", "RViz",
                                               "gridworld.rviz - RViz",
                                               "gridworld.rviz* - RViz"),
                                 name_suffix="rviz",
                                 frame_rate=30.0,
                                 save_dir=cfg.VIDEO_DIR):
        pre_run_setup(env, ctrl, ds, sim)

        sim.run(seed, run_name)
        logger.info("last run cost %f", np.sum(sim.last_run_cost))
        time.sleep(2)
    plt.ioff()
    plt.show()
Exemple #12
0
def run_controller(
        default_run_prefix,
        pre_run_setup,
        seed=1,
        level=1,
        gating=None,
        use_tsf=UseTsf.COORD,
        nominal_adapt=OnlineAdapt.NONE,
        autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE,
        use_demo=False,
        use_trap_cost=True,
        reuse_escape_as_demonstration=False,
        num_frames=200,
        run_prefix=None,
        run_name=None,
        assume_all_nonnominal_dynamics_are_traps=False,
        rep_name=None,
        visualize_rollout=False,
        override_tampc_params=None,
        override_mpc_params=None,
        never_estimate_error=False,
        apfvo_baseline=False,
        apfsp_baseline=False,
        **kwargs):
    env = PegGetter.env(p.GUI, level=level, log_video=True)
    logger.info("initial random seed %d", rand.seed(seed))

    ds, pm = PegGetter.prior(env, use_tsf, rep_name=rep_name)

    dss = [ds]
    demo_trajs = []
    for demo in demo_trajs:
        ds_local = PegGetter.ds(env, demo, validation_ratio=0.)
        ds_local.update_preprocessor(ds.preprocessor)
        dss.append(ds_local)

    hybrid_dynamics = hybrid_model.HybridDynamicsModel(
        dss,
        pm,
        env.state_difference, [use_tsf.name],
        device=get_device(),
        preprocessor=no_tsf_preprocessor(),
        nominal_model_kwargs={'online_adapt': nominal_adapt},
        local_model_kwargs=kwargs)

    # we're always going to be in the nominal mode in this case; might as well speed up testing
    if not use_demo and not reuse_escape_as_demonstration:
        gating = AlwaysSelectNominal()
    else:
        gating = hybrid_dynamics.get_gating() if gating is None else gating

    tampc_opts, mpc_opts = PegGetter.controller_options(env)
    if override_tampc_params is not None:
        tampc_opts.update(override_tampc_params)
    if override_mpc_params is not None:
        mpc_opts.update(override_mpc_params)

    logger.debug(
        "running with parameters\nhigh level controller: %s\nlow level MPC: %s",
        pprint.pformat(tampc_opts), pprint.pformat(mpc_opts))

    if apfvo_baseline or apfsp_baseline:
        tampc_opts.pop('trap_cost_annealing_rate')
        tampc_opts.pop('abs_unrecognized_threshold')
        if apfvo_baseline:
            rho = 0.05
            if level == task_map['Peg-I']:
                rho = 0.04  # use lower value to prevent obstacle detected below to prevent us from entering the goal
            elif level == task_map['Peg-U']:
                rho = 0.025  # use lower value to place more dense virtual obstacles to increase chance of entering
            ctrl = online_controller.APFVO(ds,
                                           hybrid_dynamics,
                                           ds.original_config(),
                                           gating=gating,
                                           local_min_threshold=0.005,
                                           trap_max_dist_influence=rho,
                                           repulsion_gain=0.01,
                                           **tampc_opts)
            env.draw_user_text("APF-VO baseline", 13, left_offset=-1.5)
        if apfsp_baseline:
            # anything lower leads to oscillation between backing up and entering the trap's field of influence
            rho = 0.07
            if level == task_map['Peg-U']:
                rho = 0.055
            ctrl = online_controller.APFSP(ds,
                                           hybrid_dynamics,
                                           ds.original_config(),
                                           gating=gating,
                                           trap_max_dist_influence=rho,
                                           backup_scale=0.7,
                                           **tampc_opts)
            env.draw_user_text("APF-SP baseline", 13, left_offset=-1.5)
    else:
        ctrl = online_controller.OnlineMPPI(
            ds,
            hybrid_dynamics,
            ds.original_config(),
            gating=gating,
            autonomous_recovery=autonomous_recovery,
            assume_all_nonnominal_dynamics_are_traps=
            assume_all_nonnominal_dynamics_are_traps,
            reuse_escape_as_demonstration=reuse_escape_as_demonstration,
            use_trap_cost=use_trap_cost,
            never_estimate_error_dynamics=never_estimate_error,
            **tampc_opts,
            mpc_opts=mpc_opts)
        env.draw_user_text(gating.name, 13, left_offset=-1.5)
        env.draw_user_text("recovery {}".format(autonomous_recovery.name),
                           11,
                           left_offset=-1.6)
        if reuse_escape_as_demonstration:
            env.draw_user_text("reuse escape", 10, left_offset=-1.6)
        if use_trap_cost:
            env.draw_user_text("trap set cost".format(
                autonomous_recovery.name),
                               9,
                               left_offset=-1.6)
    env.draw_user_text("run seed {}".format(seed), 12, left_offset=-1.5)

    z = env.initGripperPos[2]
    goal = np.r_[env.hole, z, 0, 0]
    ctrl.set_goal(goal)
    # env._dd.draw_point('hole', env.hole, color=(0, 0.5, 0.8))

    sim = peg_in_hole.PegInHole(env,
                                ctrl,
                                num_frames=num_frames,
                                plot=False,
                                save=True,
                                stop_when_done=True,
                                visualize_rollouts=visualize_rollout)
    seed = rand.seed(seed)

    if run_name is None:

        def affix_run_name(*args):
            nonlocal run_name
            for token in args:
                run_name += "__{}".format(token)

        def get_rep_model_name(ds):
            import re
            tsf_name = ""
            try:
                for tsf in ds.preprocessor.tsf.transforms:
                    if isinstance(tsf, invariant.InvariantTransformer):
                        tsf_name = tsf.tsf.name
                        tsf_name = re.match(r".*?s\d+", tsf_name)[0]
            except AttributeError:
                pass
            return tsf_name

        run_name = default_run_prefix
        if apfvo_baseline:
            run_prefix = 'APFVO'
        elif apfsp_baseline:
            run_prefix = 'APFSP'
        if run_prefix is not None:
            affix_run_name(run_prefix)
        affix_run_name(nominal_adapt.name)
        if not apfvo_baseline and not apfsp_baseline:
            affix_run_name(autonomous_recovery.name +
                           ("_WITHDEMO" if use_demo else ""))
        if never_estimate_error:
            affix_run_name('NO_E')
        affix_run_name(level)
        affix_run_name(use_tsf.name)
        affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps
                       else "SOMETRAP")
        affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE")
        affix_run_name(gating.name)
        affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST")
        affix_run_name(get_rep_model_name(ds))
        affix_run_name(seed)
        affix_run_name(num_frames)

    env.draw_user_text(run_name, 14, left_offset=-1.5)

    pre_run_setup(env, ctrl, ds)

    sim.run(seed, run_name)
    logger.info("last run cost %f", np.sum(sim.last_run_cost))
    plt.ioff()
    plt.show()

    env.close()