def freespace(seed=4, trials=200, trial_length=50, force_gui=False): env = PegGetter.env(p.GUI if force_gui else p.DIRECT, 0) u_min, u_max = env.get_control_bounds() ctrl = controller.FullRandomController(env.nu, u_min, u_max) # use mode p.GUI to see what the trials look like save_dir = '{}{}'.format(PegGetter.env_dir, 0) sim = peg_in_hole.PegInHole(env, ctrl, num_frames=trial_length, plot=False, save=True, stop_when_done=False, save_dir=save_dir) rand.seed(seed) # randomly distribute data for _ in range(trials): seed = rand.seed() # start at fixed location hole, init_peg = OfflineDataCollection.random_config(env) env.set_task_config(hole=hole, init_peg=init_peg) ctrl = controller.FullRandomController(env.nu, u_min, u_max) sim.ctrl = ctrl sim.run(seed) if sim.save: load_data.merge_data_in_dir(cfg, save_dir, save_dir) plt.ioff() plt.show()
def freespace(seed_offset=0, trials=200, trial_length=50, force_gui=False): env = PegRealGetter.env(level=0, stub=False) u_min, u_max = env.get_control_bounds() ctrl = controller.FullRandomController(env.nu, u_min, u_max) # use mode p.GUI to see what the trials look like save_dir = '{}{}'.format(PegRealGetter.env_dir, 0) sim = peg_in_hole_real.ExperimentRunner(env, ctrl, num_frames=trial_length, plot=False, save=True, stop_when_done=False, save_dir=save_dir) # randomly distribute data for offset in range(trials): seed = rand.seed(seed_offset + offset) move = input('specify dx and dy to move to') dx, dy = [float(dim) for dim in move.split()] env.reset([dx, dy]) obs = env.state run_name = "{}_{}_{}_{}".format(seed, obs[0].round(3), obs[1].round(3), obs[2].round(3)) # start at fixed location ctrl = controller.FullRandomController(env.nu, u_min, u_max) sim.ctrl = ctrl with peg_in_hole_real.VideoLogger(): sim.run(seed, run_name=run_name) env.close() if sim.save: load_data.merge_data_in_dir(cfg, save_dir, save_dir) plt.ioff() plt.show()
def test_set(): # get data in and around the bug trap we want to avoid in the future env = PegGetter.env(p.GUI, task_map['Peg-T']) env.set_task_config(init_peg=[0.1, 0.12]) def rn(scale): return np.random.randn() * scale u = [] seed = rand.seed(2) for _ in range(5): u.append([0.4, 0.7 + rn(0.5)]) for i in range(15): u.append([-0.0 + (i - 7) * 0.1, 0.8 + rn(0.5)]) for i in range(15): u.append([-0.8 + rn(0.2), -0. + (i - 7) * 0.1]) for i in range(5): u.append([-0.1 + rn(0.1), -1.]) u.append([-0.6, -0.]) for i in range(10): u.append([-0. + rn(0.5), 0.9]) ctrl = controller.PreDeterminedController(np.array(u), *env.get_control_bounds()) sim = peg_in_hole.PegInHole(env, ctrl, num_frames=len(u), plot=False, save=True, stop_when_done=False) sim.run(seed, 'peg_contact_test_set')
def freespace(seed_offset=0, trials=200, trial_length=50, force_gui=False): env = ArmGetter.env(level=0, mode=p.GUI if force_gui else p.DIRECT) u_min, u_max = env.get_control_bounds() ctrl = controller.FullRandomController(env.nu, u_min, u_max) # use mode p.GUI to see what the trials look like save_dir = '{}{}'.format(ArmGetter.env_dir, 0) sim = arm.ExperimentRunner(env, ctrl, num_frames=trial_length, plot=False, save=True, stop_when_done=False, save_dir=save_dir) # randomly distribute data for offset in range(trials): seed = rand.seed(seed_offset + offset) # random position init = [(np.random.random() - 0.5) * 1.7, (np.random.random() - 0.5) * 1.7, np.random.random() * 0.5] env.set_task_config(init=init) ctrl = controller.FullRandomController(env.nu, u_min, u_max) sim.ctrl = ctrl sim.run(seed) if sim.save: load_data.merge_data_in_dir(cfg, save_dir, save_dir) plt.ioff() plt.show()
def free_space_env_init(cls, seed=1, **kwargs): d = get_device() env = cls.env(kwargs.pop('mode', 0), **kwargs) ds = cls.ds(env, cls.data_dir(0), validation_ratio=0.1) logger.info("initial random seed %d", rand.seed(seed)) return d, env, ds.current_config(), ds
def test_env_control(): init_block_pos = [-0.5, 0.1] init_block_yaw = -math.pi / 2 face = block_push.BlockFace.LEFT along_face = 0 # env = block_push.PushWithForceDirectlyReactionInStateEnv(dynamics_class=p.GUI, init_pusher=along_face, log_video=True, # init_block=init_block_pos, init_yaw=init_block_yaw, # environment_level=1) env = block_push.PushPhysicallyAnyAlongEnv(mode=p.GUI, log_video=True, init_block=init_block_pos, init_yaw=init_block_yaw, environment_level=1) seed = rand.seed(0) # env.sim_step_wait = 0.01 u = [] # u.append((0, 1, 0)) # u.append((1, 1, 0)) # u.append((-1, 1, 0)) # for _ in range(80): # u.append((0., 1, 0.)) N = 40 u_dir = np.linspace(0, -1, N) u_mag = np.linspace(1, 0, N) for i in range(N): # u.append((0, 1, np.random.randn())) # u.append((0.1, u_mag[i], u_dir[i])) u.append((-0.5, 0.5 + np.random.rand(), 1.0)) ctrl = controller.PreDeterminedController(u) sim = block_push.InteractivePush(env, ctrl, num_frames=len(u), plot=True, save=True) sim.run(seed) plt.ioff() plt.show()
def freespace(seed_offset=0, trials=200, trial_length=50, force_gui=False): env = GridGetter.env(level=0, check_boundaries=False) u_min, u_max = env.get_control_bounds() ctrl = controller.FullRandomController(env.nu, u_min, u_max) # use mode p.GUI to see what the trials look like save_dir = '{}{}'.format(GridGetter.env_dir, 0) sim = gridworld.ExperimentRunner(env, ctrl, num_frames=trial_length, plot=False, save=True, pause_s_between_steps=0.01, stop_when_done=False, save_dir=save_dir) rospy.sleep(0.5) sim.clear_markers() # randomly distribute data for offset in range(trials): seed = rand.seed(seed_offset + offset) # random position init = [int(np.random.random() * max_val) for max_val in env.size] env.set_task_config(init=init) ctrl = controller.FullRandomController(env.nu, u_min, u_max) sim.ctrl = ctrl with recorder.WindowRecorder( window_names=("RViz*", "RViz", "gridworld.rviz - RViz", "gridworld.rviz* - RViz"), name_suffix="rviz", frame_rate=30.0, save_dir=cfg.VIDEO_DIR): sim.run(seed) if sim.save: load_data.merge_data_in_dir(cfg, save_dir, save_dir) plt.ioff() plt.show()
def run_controller( default_run_prefix, pre_run_setup, seed=1, level=1, gating=None, use_tsf=UseTsf.COORD, nominal_adapt=OnlineAdapt.NONE, autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE, use_demo=False, use_trap_cost=True, reuse_escape_as_demonstration=False, num_frames=200, run_prefix=None, run_name=None, assume_all_nonnominal_dynamics_are_traps=False, rep_name=None, visualize_rollout=False, override_tampc_params=None, override_mpc_params=None, never_estimate_error=False, apfvo_baseline=False, apfsp_baseline=False, **kwargs): env = PegRealGetter.env(level=level, stub=False) logger.info("initial random seed %d", rand.seed(seed)) ds, pm = PegRealGetter.prior(env, use_tsf, rep_name=rep_name) dss = [ds] demo_trajs = [] for demo in demo_trajs: ds_local = PegRealGetter.ds(env, demo, validation_ratio=0.) ds_local.update_preprocessor(ds.preprocessor) dss.append(ds_local) hybrid_dynamics = hybrid_model.HybridDynamicsModel( dss, pm, env.state_difference, [use_tsf.name], device=get_device(), preprocessor=no_tsf_preprocessor(), nominal_model_kwargs={'online_adapt': nominal_adapt}, local_model_kwargs=kwargs) # we're always going to be in the nominal mode in this case; might as well speed up testing if not use_demo and not reuse_escape_as_demonstration: gating = AlwaysSelectNominal() else: gating = hybrid_dynamics.get_gating() if gating is None else gating tampc_opts, mpc_opts = PegRealGetter.controller_options(env) if override_tampc_params is not None: tampc_opts.update(override_tampc_params) if override_mpc_params is not None: mpc_opts.update(override_mpc_params) logger.debug( "running with parameters\nhigh level controller: %s\nlow level MPC: %s", pprint.pformat(tampc_opts), pprint.pformat(mpc_opts)) if apfvo_baseline or apfsp_baseline: tampc_opts.pop('trap_cost_annealing_rate') tampc_opts.pop('abs_unrecognized_threshold') tampc_opts.pop('dynamics_minimum_window') tampc_opts.pop('max_trap_weight') if apfvo_baseline: ctrl = online_controller.APFVO(ds, hybrid_dynamics, ds.original_config(), gating=gating, local_min_threshold=0.005, trap_max_dist_influence=0.02, repulsion_gain=0.01, **tampc_opts) if apfsp_baseline: ctrl = online_controller.APFSP(ds, hybrid_dynamics, ds.original_config(), gating=gating, trap_max_dist_influence=0.045, **tampc_opts) else: ctrl = online_controller.OnlineMPPI( ds, hybrid_dynamics, ds.original_config(), gating=gating, autonomous_recovery=autonomous_recovery, assume_all_nonnominal_dynamics_are_traps= assume_all_nonnominal_dynamics_are_traps, reuse_escape_as_demonstration=reuse_escape_as_demonstration, never_estimate_error_dynamics=never_estimate_error, use_trap_cost=use_trap_cost, **tampc_opts, mpc_opts=mpc_opts) z = 0.98 goal = np.r_[env.hole, z, 0, 0] ctrl.set_goal(goal) sim = peg_in_hole_real.ExperimentRunner(env, ctrl, num_frames=num_frames, plot=False, save=True, stop_when_done=True) seed = rand.seed(seed) if run_name is None: def affix_run_name(*args): nonlocal run_name for token in args: run_name += "__{}".format(token) def get_rep_model_name(ds): import re tsf_name = "" try: for tsf in ds.preprocessor.tsf.transforms: if isinstance(tsf, invariant.InvariantTransformer): tsf_name = tsf.tsf.name tsf_name = re.match(r".*?s\d+", tsf_name)[0] except AttributeError: pass # TODO also include model name return tsf_name run_name = default_run_prefix if apfvo_baseline: run_prefix = 'APFVO' elif apfsp_baseline: run_prefix = 'APFSP' if run_prefix is not None: affix_run_name(run_prefix) affix_run_name(nominal_adapt.name) if not apfvo_baseline and not apfsp_baseline: affix_run_name(autonomous_recovery.name + ("_WITHDEMO" if use_demo else "")) if never_estimate_error: affix_run_name('NO_E') affix_run_name(level) affix_run_name(use_tsf.name) affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps else "SOMETRAP") affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE") affix_run_name(gating.name) affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST") affix_run_name(get_rep_model_name(ds)) affix_run_name(seed) affix_run_name(num_frames) time.sleep(1) sim.clear_markers() time.sleep(1) sim.dd.draw_text("seed", "s{}".format(seed), 1, left_offset=-1.4) sim.dd.draw_text("recovery_method", "recovery {}".format(autonomous_recovery.name), 2, left_offset=-1.4) if reuse_escape_as_demonstration: sim.dd.draw_text("resuse", "reuse escape", 3, left_offset=-1.4) sim.dd.draw_text("run_name", run_name, 18, left_offset=-0.8, scale=3) with peg_in_hole_real.VideoLogger(): pre_run_setup(env, ctrl, ds) sim.run(seed, run_name) logger.info("last run cost %f", np.sum(sim.last_run_cost)) time.sleep(2) plt.ioff() plt.show()
ENV_NAME = "Pendulum-v0" TIMESTEPS = 15 # T N_SAMPLES = 100 # K ACTION_LOW = -2.0 ACTION_HIGH = 2.0 USE_PREVIOUS_TRIAL_DATA = False SAVE_TRIAL_DATA = False num_frames = 500 d = torch.device("cuda") if torch.cuda.is_available() else torch.device( "cpu") d = torch.device("cpu") dtype = torch.double seed = 6 logger.info("random seed %d", rand.seed(seed)) save_dir = os.path.join(cfg.DATA_DIR, ENV_NAME) save_to = os.path.join(save_dir, "{}.mat".format(seed)) # new hyperparmaeters for approximate dynamics TRAIN_EPOCH = 150 # need more epochs if we're freezing prior (~800) BOOT_STRAP_ITER = 100 nx = 2 nu = 1 Q = torch.tensor([[1, 0], [0, 0.1]], dtype=dtype, device=d) R = 0.001 config = load_data.DataConfig(predict_difference=True, predict_all_dims=True)
# Kalman update innovation = reward - C @ pred_mean # tilde y_k innovation_cov = C @ pred_cov @ C.t() + obs_cov # S_k kalman_gain = pred_cov @ C.t() @ innovation_cov.inverse() # K_k # a posteriori estimate self._mean = pred_mean + kalman_gain @ innovation self._cov = pred_cov - kalman_gain @ C @ pred_cov # fix to be symmetric self._cov = (self._cov + self._cov.t()) * 0.5 if __name__ == "__main__": from arm_pytorch_utilities import rand rand.seed(0) num_arms = 7 obs_noise = torch.ones(1) * 1 process_noise_scaling = 0.1 num_costs = 3 cost_weights = torch.rand((num_arms, num_costs)) # each arm is a row of the cost weight; normalize so it sums to 1 cost_weights /= cost_weights.sum(dim=1).view(num_arms, 1) # give special meaning to the first few arms (they are 1-hot) cost_weights[:num_costs, :num_costs] = torch.eye(num_costs) print("cost weights") print(cost_weights) def _calculate_mab_process_noise():
def run_controller( default_run_prefix, pre_run_setup, seed=1, level=1, gating=None, use_tsf=UseTsf.COORD, nominal_adapt=OnlineAdapt.NONE, autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE, use_demo=False, use_trap_cost=True, reuse_escape_as_demonstration=False, num_frames=200, run_prefix=None, run_name=None, assume_all_nonnominal_dynamics_are_traps=False, rep_name=None, visualize_rollout=False, override_tampc_params=None, override_mpc_params=None, **kwargs): env = GridGetter.env(level=level) logger.info("initial random seed %d", rand.seed(seed)) ds, pm = GridGetter.prior(env, use_tsf, rep_name=rep_name) dss = [ds] demo_trajs = [] for demo in demo_trajs: ds_local = GridGetter.ds(env, demo, validation_ratio=0.) ds_local.update_preprocessor(ds.preprocessor) dss.append(ds_local) hybrid_dynamics = hybrid_model.HybridDynamicsModel( dss, pm, env.state_difference, [use_tsf.name], device=get_device(), preprocessor=no_tsf_preprocessor(), nominal_model_kwargs={'online_adapt': nominal_adapt}, local_model_kwargs=kwargs) # we're always going to be in the nominal mode in this case; might as well speed up testing if not use_demo and not reuse_escape_as_demonstration: gating = AlwaysSelectNominal() else: gating = hybrid_dynamics.get_gating() if gating is None else gating tampc_opts, mpc_opts = GridGetter.controller_options(env) if override_tampc_params is not None: tampc_opts.update(override_tampc_params) if override_mpc_params is not None: mpc_opts.update(override_mpc_params) logger.debug( "running with parameters\nhigh level controller: %s\nlow level MPC: %s", pprint.pformat(tampc_opts), pprint.pformat(mpc_opts)) ctrl = online_controller.OnlineMPPI( ds, hybrid_dynamics, ds.original_config(), gating=gating, autonomous_recovery=autonomous_recovery, assume_all_nonnominal_dynamics_are_traps= assume_all_nonnominal_dynamics_are_traps, reuse_escape_as_demonstration=reuse_escape_as_demonstration, use_trap_cost=use_trap_cost, **tampc_opts, mpc_opts=mpc_opts) ctrl.set_goal(env.goal) sim = gridworld.ExperimentRunner(env, ctrl, num_frames=num_frames, plot=False, save=True, stop_when_done=True) seed = rand.seed(seed) if run_name is None: def affix_run_name(*args): nonlocal run_name for token in args: run_name += "__{}".format(token) def get_rep_model_name(ds): import re tsf_name = "" try: for tsf in ds.preprocessor.tsf.transforms: if isinstance(tsf, invariant.InvariantTransformer): tsf_name = tsf.tsf.name tsf_name = re.match(r".*?s\d+", tsf_name)[0] except AttributeError: pass # TODO also include model name return tsf_name run_name = default_run_prefix if run_prefix is not None: affix_run_name(run_prefix) affix_run_name(nominal_adapt.name) affix_run_name(autonomous_recovery.name + ("_WITHDEMO" if use_demo else "")) affix_run_name(level) affix_run_name(use_tsf.name) affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps else "SOMETRAP") affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE") affix_run_name(gating.name) affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST") affix_run_name(get_rep_model_name(ds)) affix_run_name(seed) affix_run_name(num_frames) time.sleep(1) sim.clear_markers() time.sleep(1) sim.dd.draw_text("seed", "s{}".format(seed), 1, left_offset=-1.4) sim.dd.draw_text("recovery_method", "recovery {}".format(autonomous_recovery.name), 2, left_offset=-1.4) if reuse_escape_as_demonstration: sim.dd.draw_text("resuse", "reuse escape", 3, left_offset=-1.4) sim.dd.draw_text("run_name", run_name, 18, left_offset=-0.8) with recorder.WindowRecorder(window_names=("RViz*", "RViz", "gridworld.rviz - RViz", "gridworld.rviz* - RViz"), name_suffix="rviz", frame_rate=30.0, save_dir=cfg.VIDEO_DIR): pre_run_setup(env, ctrl, ds, sim) sim.run(seed, run_name) logger.info("last run cost %f", np.sum(sim.last_run_cost)) time.sleep(2) plt.ioff() plt.show()
def run_controller( default_run_prefix, pre_run_setup, seed=1, level=1, gating=None, use_tsf=UseTsf.COORD, nominal_adapt=OnlineAdapt.NONE, autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE, use_demo=False, use_trap_cost=True, reuse_escape_as_demonstration=False, num_frames=200, run_prefix=None, run_name=None, assume_all_nonnominal_dynamics_are_traps=False, rep_name=None, visualize_rollout=False, override_tampc_params=None, override_mpc_params=None, never_estimate_error=False, apfvo_baseline=False, apfsp_baseline=False, **kwargs): env = PegGetter.env(p.GUI, level=level, log_video=True) logger.info("initial random seed %d", rand.seed(seed)) ds, pm = PegGetter.prior(env, use_tsf, rep_name=rep_name) dss = [ds] demo_trajs = [] for demo in demo_trajs: ds_local = PegGetter.ds(env, demo, validation_ratio=0.) ds_local.update_preprocessor(ds.preprocessor) dss.append(ds_local) hybrid_dynamics = hybrid_model.HybridDynamicsModel( dss, pm, env.state_difference, [use_tsf.name], device=get_device(), preprocessor=no_tsf_preprocessor(), nominal_model_kwargs={'online_adapt': nominal_adapt}, local_model_kwargs=kwargs) # we're always going to be in the nominal mode in this case; might as well speed up testing if not use_demo and not reuse_escape_as_demonstration: gating = AlwaysSelectNominal() else: gating = hybrid_dynamics.get_gating() if gating is None else gating tampc_opts, mpc_opts = PegGetter.controller_options(env) if override_tampc_params is not None: tampc_opts.update(override_tampc_params) if override_mpc_params is not None: mpc_opts.update(override_mpc_params) logger.debug( "running with parameters\nhigh level controller: %s\nlow level MPC: %s", pprint.pformat(tampc_opts), pprint.pformat(mpc_opts)) if apfvo_baseline or apfsp_baseline: tampc_opts.pop('trap_cost_annealing_rate') tampc_opts.pop('abs_unrecognized_threshold') if apfvo_baseline: rho = 0.05 if level == task_map['Peg-I']: rho = 0.04 # use lower value to prevent obstacle detected below to prevent us from entering the goal elif level == task_map['Peg-U']: rho = 0.025 # use lower value to place more dense virtual obstacles to increase chance of entering ctrl = online_controller.APFVO(ds, hybrid_dynamics, ds.original_config(), gating=gating, local_min_threshold=0.005, trap_max_dist_influence=rho, repulsion_gain=0.01, **tampc_opts) env.draw_user_text("APF-VO baseline", 13, left_offset=-1.5) if apfsp_baseline: # anything lower leads to oscillation between backing up and entering the trap's field of influence rho = 0.07 if level == task_map['Peg-U']: rho = 0.055 ctrl = online_controller.APFSP(ds, hybrid_dynamics, ds.original_config(), gating=gating, trap_max_dist_influence=rho, backup_scale=0.7, **tampc_opts) env.draw_user_text("APF-SP baseline", 13, left_offset=-1.5) else: ctrl = online_controller.OnlineMPPI( ds, hybrid_dynamics, ds.original_config(), gating=gating, autonomous_recovery=autonomous_recovery, assume_all_nonnominal_dynamics_are_traps= assume_all_nonnominal_dynamics_are_traps, reuse_escape_as_demonstration=reuse_escape_as_demonstration, use_trap_cost=use_trap_cost, never_estimate_error_dynamics=never_estimate_error, **tampc_opts, mpc_opts=mpc_opts) env.draw_user_text(gating.name, 13, left_offset=-1.5) env.draw_user_text("recovery {}".format(autonomous_recovery.name), 11, left_offset=-1.6) if reuse_escape_as_demonstration: env.draw_user_text("reuse escape", 10, left_offset=-1.6) if use_trap_cost: env.draw_user_text("trap set cost".format( autonomous_recovery.name), 9, left_offset=-1.6) env.draw_user_text("run seed {}".format(seed), 12, left_offset=-1.5) z = env.initGripperPos[2] goal = np.r_[env.hole, z, 0, 0] ctrl.set_goal(goal) # env._dd.draw_point('hole', env.hole, color=(0, 0.5, 0.8)) sim = peg_in_hole.PegInHole(env, ctrl, num_frames=num_frames, plot=False, save=True, stop_when_done=True, visualize_rollouts=visualize_rollout) seed = rand.seed(seed) if run_name is None: def affix_run_name(*args): nonlocal run_name for token in args: run_name += "__{}".format(token) def get_rep_model_name(ds): import re tsf_name = "" try: for tsf in ds.preprocessor.tsf.transforms: if isinstance(tsf, invariant.InvariantTransformer): tsf_name = tsf.tsf.name tsf_name = re.match(r".*?s\d+", tsf_name)[0] except AttributeError: pass return tsf_name run_name = default_run_prefix if apfvo_baseline: run_prefix = 'APFVO' elif apfsp_baseline: run_prefix = 'APFSP' if run_prefix is not None: affix_run_name(run_prefix) affix_run_name(nominal_adapt.name) if not apfvo_baseline and not apfsp_baseline: affix_run_name(autonomous_recovery.name + ("_WITHDEMO" if use_demo else "")) if never_estimate_error: affix_run_name('NO_E') affix_run_name(level) affix_run_name(use_tsf.name) affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps else "SOMETRAP") affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE") affix_run_name(gating.name) affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST") affix_run_name(get_rep_model_name(ds)) affix_run_name(seed) affix_run_name(num_frames) env.draw_user_text(run_name, 14, left_offset=-1.5) pre_run_setup(env, ctrl, ds) sim.run(seed, run_name) logger.info("last run cost %f", np.sum(sim.last_run_cost)) plt.ioff() plt.show() env.close()