def test_pusher_slider_dataset(): # dataset, config = create_pusher_slider_dataset() project_root = get_project_root() config_file = os.path.join(project_root, "experiments/01/config.yaml") config = load_yaml(config_file) # new dataset loading approach episodes = load_episodes_from_config(config) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) dataset = MultiEpisodeDataset(config, action_function=action_function, observation_function=observation_function, episodes=episodes, phase="train") data = dataset[0] # test the getitem print("type(data)", type(data)) print("list(data)", list(data)) print(type(data["observations"])) print("observations.shape", data["observations"].shape) print("actions.shape", data["actions"].shape) print("observations", data["observations"]) print("actions", data["actions"]) stats = dataset.compute_dataset_statistics() print("stats", stats)
def test_pusher_slider_keypoint_dataset(): project_root = get_project_root() config_file = os.path.join(project_root, "experiments/02/config.yaml") config = load_yaml(config_file) config["n_history"] = 1 config["n_roll"] = 0 # new dataset loading approach episodes = load_episodes_from_config(config) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) dataset = MultiEpisodeDataset(config, action_function=action_function, observation_function=observation_function, episodes=episodes, phase="train") # dataset, config = create_pusher_slider_keypoint_dataset(config=config) episode_names = dataset.get_episode_names() episode_names.sort() episode_name = episode_names[0] episode = dataset.episode_dict[episode_name] obs_raw = episode.get_observation(0) obs_raw['slider']['angle'] = 0 dataset.observation_function(obs_raw) print("20 degrees\n\n\n\n") obs_raw['slider']['angle'] = np.deg2rad(90) dataset.observation_function(obs_raw) quit() data = dataset[0] # test the getitem print("type(data)", type(data)) print("data.keys()", data.keys()) print(type(data["observations"])) print("observations.shape", data["observations"].shape) print("actions.shape", data["actions"].shape) print("observations", data["observations"]) print("actions", data["actions"])
def construct_dataset_from_config( config, # dict for global config phase="train", # str: either "train" or "valid" episodes=None, # optional dict of episodes to use instead of loading data ): """ Construct dataset based on global config :param config: :type config: :return: :rtype: """ assert phase in ["train", "valid"] data_path = config["dataset"]["data_path"] if not os.path.isabs(data_path): data_path = os.path.join(get_project_root(), data_path) if episodes is None: # load the data if not passed in episodes = load_episodes_from_config(config) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) dataset = MultiEpisodeDataset(config, action_function=action_function, observation_function=observation_function, episodes=episodes, phase=phase) return_data = { "dataset": dataset, "action_function": action_function, "observation_function": observation_function, "episodes": episodes, } return return_data
def create_pusher_slider_keypoint_dataset(config=None): # load some previously generated data project_root = get_project_root() if config is None: config_file = os.path.join(project_root, "experiments/02/config.yaml") config = load_yaml(config_file) action_function = ActionFunctionFactory.pusher_velocity obs_function = ObservationFunctionFactory.pusher_pose_slider_keypoints( config) DATA_PATH = os.path.join( project_root, "test_data/pusher_slider_10_episodes/2019-10-22-21-30-02-536750.p") raw_data = load_pickle(DATA_PATH) episodes = PyMunkEpisodeReader.load_pymunk_episodes_from_raw_data(raw_data) # create MultiEpisodeDataset dataset = MultiEpisodeDataset(config, action_function=action_function, observation_function=obs_function, episodes=episodes) episode = dataset.get_random_episode() data_0 = episode.get_observation(0) data_1 = episode.get_observation(1) print("time 0", data_0["sim_time"]) print("time 1", data_1["sim_time"]) # episode_name = episodes.keys()[0] # episode = episodes[episode_name] # data = episode.data # print("episode.data.keys()", episode.data.keys()) # print("test ", type(data["trajectory"][0].keys())) # print("test ", data["trajectory"][0].keys()) return dataset, config
def load_model_and_data( K_matrix=None, T_world_camera=None, ): dataset_name = "push_box_hardware" model_name = "DD_3D/2020-07-02-17-59-21-362337_DD_3D_n_his_2_T_aug" train_dir = os.path.join( get_data_root(), "dev/experiments/22/dataset_push_box_hardware/trained_models/dynamics") # train_dir = "/home/manuelli/data/key_dynam/dev/experiments/22/dataset_push_box_hardware/trained_models/dynamics" train_dir = os.path.join(train_dir, model_name) ckpt_file = os.path.join(train_dir, "net_best_dy_state_dict.pth") train_config = load_yaml(os.path.join(train_dir, 'config.yaml')) state_dict = torch.load(ckpt_file) # build dynamics model model_dy = build_dynamics_model(train_config) # print("state_dict.keys()", state_dict.keys()) model_dy.load_state_dict(state_dict) model_dy = model_dy.eval() model_dy = model_dy.cuda() # load the dataset dataset_paths = get_dataset_paths(dataset_name) dataset_root = dataset_paths['dataset_root'] episodes_config = dataset_paths['episodes_config'] spatial_descriptor_data = load_pickle( os.path.join(train_dir, 'spatial_descriptors.p')) metadata = load_pickle(os.path.join(train_dir, 'metadata.p')) ref_descriptors = spatial_descriptor_data['spatial_descriptors'] ref_descriptors = torch_utils.cast_to_torch(ref_descriptors).cuda() # dense descriptor model model_dd_file = metadata['model_file'] model_dd = torch.load(model_dd_file) model_dd = model_dd.eval() model_dd = model_dd.cuda() camera_name = train_config['dataset']['visual_observation_function'][ 'camera_name'] camera_info = None if (T_world_camera is not None) and (K_matrix is not None): camera_info = { "K": K_matrix, 'T_world_camera': T_world_camera, } else: camera_info = get_spartan_camera_info(camera_name) camera_info['camera_name'] = camera_name visual_observation_function = \ VisualObservationFunctionFactory.descriptor_keypoints_3D(config=train_config, camera_name=camera_name, model_dd=model_dd, ref_descriptors=ref_descriptors, K_matrix=camera_info['K'], T_world_camera=camera_info['T_world_camera'], ) action_function = ActionFunctionFactory.function_from_config(train_config) observation_function = ObservationFunctionFactory.function_from_config( train_config) #### PLANNER ####### planner = None # make a planner config planner_config = copy.copy(train_config) config_tmp = load_yaml( os.path.join(get_project_root(), 'experiments/exp_22_push_box_hardware/config_DD_3D.yaml')) planner_config['mpc'] = config_tmp['mpc'] if PLANNER_TYPE == "random_shooting": planner = RandomShootingPlanner(planner_config) elif PLANNER_TYPE == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (PLANNER_TYPE)) return { "model_dy": model_dy, 'config': train_config, 'spatial_descriptor_data': spatial_descriptor_data, 'action_function': action_function, 'observation_function': observation_function, 'visual_observation_function': visual_observation_function, 'planner': planner, 'camera_info': camera_info, }
def evaluate_mpc_z_state( model_dir, config_planner_mpc=None, save_dir=None, planner_type=None, env_config=None, strict=True, generate_initial_condition_func=None, env_type="DrakePusherSliderEnv", ): assert save_dir is not None assert planner_type is not None assert env_config is not None assert generate_initial_condition_func is not None model_dict = load_model(model_dir, strict=strict) model_dy = model_dict['model_dy']['model_dy'] config = model_dict['model_dy']['config'] model_config = config model_kp = model_dict['model_kp']['model'] config_kp = model_kp.config camera_name = config_kp['perception']['camera_name'] # create the environment # create the environment env = None if env_type == "DrakePusherSliderEnv": env = DrakePusherSliderEnv(env_config, visualize=False) elif env_type == "DrakeMugsEnv": env = DrakeMugsEnv(env_config, visualize=False) else: raise ValueError("unknown env type: %s" % (env_type)) env.reset() T_world_camera = env.camera_pose(camera_name) camera_K_matrix = env.camera_K_matrix(camera_name) mask_labels = env.get_labels_to_mask_list() action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.drake_pusher_position_3D( config) visual_observation_function = \ VisualObservationFunctionFactory.function_from_config(config, camera_name=camera_name, model_kp=model_kp, K_matrix=camera_K_matrix, T_world_camera=T_world_camera, mask_labels=mask_labels) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder( observation_function=observation_function, visual_observation_function=visual_observation_function, action_function=action_function, episode=episode) def goal_func(obs_tmp): state_tmp = mpc_input_builder.get_state_input_single_timestep( {'observation': obs_tmp})['state'] # z_dict= model_dy.compute_z_state(state_tmp.unsqueeze(0)) # print("z_dict['z_object'].shape", z_dict['z_object'].shape) return model_dy.compute_z_state( state_tmp.unsqueeze(0))['z_object_flat'] index_dict = get_object_and_robot_state_indices(model_config) object_indices = index_dict['object_indices'] # make a planner config, same as model config but with mpc and eval sections # replaced planner_config = copy.copy(model_config) if config_planner_mpc is not None: planner_config['mpc'] = config_planner_mpc['mpc'] planner_config['eval'] = config_planner_mpc['eval'] planner = None if planner_type == "random_shooting": planner = RandomShootingPlanner(planner_config) elif planner_type == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (planner_type)) # run a single iteration mpc_eval_drake_pusher_slider.evaluate_mpc( model_dy=model_dy, env=env, episode=episode, mpc_input_builder=mpc_input_builder, planner=planner, eval_indices=object_indices, goal_func=goal_func, config=planner_config, wait_for_user_input=False, save_dir=save_dir, model_name="test", experiment_name="test", generate_initial_condition_func=generate_initial_condition_func) return {'save_dir': save_dir}
def evaluate_mpc(model_dir, config_planner_mpc=None, save_dir=None, planner_type=None, env_config=None, strict=True, generate_initial_condition_func=None, ): assert save_dir is not None assert planner_type is not None assert env_config is not None assert generate_initial_condition_func is not None model_data = model_builder.load_dynamics_model_from_folder(model_dir, strict=strict) model_dy = model_data['model_dy'] model_dy = model_dy.eval() model_dy = model_dy.cuda() model_config = model_dy.config # create the environment env = DrakePusherSliderEnv(env_config, visualize=False) env.reset() observation_function = ObservationFunctionFactory.function_from_config(model_config) action_function = ActionFunctionFactory.function_from_config(model_config) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder(observation_function=observation_function, action_function=action_function, visual_observation_function=None, episode=episode) pts_GT = np.array(model_config['dataset']['observation_function']['GT_3D_object_points']) K = pts_GT.shape[0] # num keypoints eval_indices = np.arange(3 * K) # extract the keypoints def goal_func(obs_local): """ Helper function for getting the goal state from an observation """ return observation_function(obs_local)[eval_indices] # make a planner config, same as model config but with mpc and eval sections # replaced planner_config = copy.copy(model_config) if config_planner_mpc is not None: planner_config['mpc'] = config_planner_mpc['mpc'] planner_config['eval'] = config_planner_mpc['eval'] planner = None if planner_type == "random_shooting": planner = RandomShootingPlanner(planner_config) elif planner_type == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" %(planner_type)) # run a single iteration mpc_eval_drake_pusher_slider.evaluate_mpc(model_dy=model_dy, env=env, episode=episode, mpc_input_builder=mpc_input_builder, planner=planner, eval_indices=eval_indices, goal_func=goal_func, config=planner_config, wait_for_user_input=False, save_dir=save_dir, model_name="test", experiment_name="test", generate_initial_condition_func=generate_initial_condition_func, ) return {'save_dir': save_dir}
def eval_dynamics( config, eval_dir, # str: directory to save output multi_episode_dict=None, n_rollout_list=None, model_dy=None, # should already be in eval mode phase_list=None, # typically it's num_epochs=10, ): assert n_rollout_list is not None assert model_dy is not None assert multi_episode_dict is not None if phase_list is None: phase_list = ["valid"] # set random seed for reproduction set_seed(config['train']['random_seed']) tensorboard_dir = os.path.join(eval_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(eval_dir, "config.yaml")) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) use_gpu = torch.cuda.is_available() best_valid_loss = np.inf global_iteration = 0 counters = {'train': 0, 'valid': 0} epoch_counter_external = 0 stats = dict() for n_rollout in n_rollout_list: stats[n_rollout] = dict() config_tmp = copy.copy(config) config_tmp['train']['n_rollout'] = n_rollout for phase in phase_list: stats[n_rollout][phase] = dict() print("Loading data for %s" % phase) dataset = MultiEpisodeDataset( config_tmp, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase) dataloader = DataLoader(dataset, batch_size=config['train']['batch_size'], shuffle=True, num_workers=config['train']['num_workers'], drop_last=True) loss_tensor_container = {"l2_avg": [], "l2_final_step": []} step_duration_meter = AverageMeter() global_iteration = 0 for epoch in range(num_epochs): for i, data in enumerate(dataloader): loss_container = dict() # store the losses for this step # types of losses ["l2_avg", "l2_final_step"] step_start_time = time.time() global_iteration += 1 counters[phase] += 1 with torch.no_grad(): n_his = config['train']['n_history'] n_roll = n_rollout n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" % (global_iteration)) print("n_samples", n_samples) # [B, n_samples, obs_dim] observations = data['observations'] # [B, n_samples, action_dim] actions = data['actions'] B = actions.shape[0] if use_gpu: observations = observations.cuda() actions = actions.cuda() # states, actions = data assert actions.shape[1] == n_samples loss_mse = 0. # we don't have any visual observations, so states are observations states = observations # state_cur: B x n_his x state_dim # state_cur = states[:, :n_his] # [B, n_his, state_dim] state_init = states[:, :n_his] # We want to rollout n_roll steps # actions = [B, n_his + n_roll, -1] # so we want action_seq.shape = [B, n_roll, -1] action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx: action_end_idx, :] if DEBUG: print("states.shape", states.shape) print("state_init.shape", state_init.shape) print("actions.shape", actions.shape) print("action_seq.shape", action_seq.shape) # try using models_dy.rollout_model instead of doing this manually rollout_data = rollout_model(state_init=state_init, action_seq=action_seq, dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] state_rollout_pred = rollout_data['state_pred'] # [B, n_roll, state_dim] state_rollout_gt = states[:, n_his:] if DEBUG: print("state_rollout_gt.shape", state_rollout_gt.shape) print("state_rollout_pred.shape", state_rollout_pred.shape) # the loss function is between # [B, n_roll, state_dim] state_pred_err = state_rollout_pred - state_rollout_gt # [B] l2_avg_tensor = torch.mean(torch.norm(state_pred_err, dim=-1), dim=1).detach().cpu() l2_avg = l2_avg_tensor.mean() # [B] l2_final_step_tensor = torch.norm( state_pred_err[:, -1], dim=-1).detach().cpu() l2_final_step = l2_final_step_tensor.mean() loss_tensor_container["l2_avg"].append(l2_avg_tensor) loss_container["l2_avg"] = l2_avg loss_tensor_container["l2_final_step"].append( l2_final_step_tensor) loss_container["l2_final_step"] = l2_final_step step_duration_meter.update(time.time() - step_start_time) if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0): # print some logging information log = "" log += ', step time %.6f' % (step_duration_meter.avg) # log data to tensorboard for loss_type, loss_obj in loss_container.items(): plot_name = "%s/n_roll_%s/%s" % (loss_type, n_roll, phase) writer.add_scalar(plot_name, loss_obj.item(), global_iteration) log += " %s: %.6f," % (plot_name, loss_obj.item()) print(log) writer.flush() # flush SummaryWriter events to disk stats[n_rollout][phase] = dict() for loss_type in loss_tensor_container: t = torch.cat(loss_tensor_container[loss_type]) mean = t.mean() median = t.median() std = t.std() stats[n_rollout][phase][loss_type] = { 'mean': mean, 'median': median, 'std': std } for stat_type, val in stats[n_rollout][phase][loss_type].items( ): plot_name = "stats/%s/n_roll_%d/%s/%s" % ( loss_type, n_roll, phase, stat_type) for idx_tmp in [0, 10, 100]: writer.add_scalar(plot_name, val, idx_tmp)
def load_model_and_data(): dataset_name = "push_box_hardware" # model_name = "DD_2D/2020-06-24-22-22-58-234812_DD_3D_n_his_2" # this model is actually 3D # model_name = "DD_3D/2020-06-25-00-49-29-679042_DD_3D_n_his_2_T_aug" # model_name = "DD_3D/2020-06-25-00-39-29-020621_DD_3D_n_his_2" model_name = "DD_3D/2020-07-02-17-59-21-362337_DD_3D_n_his_2_T_aug" train_dir = "/home/manuelli/data/key_dynam/dev/experiments/22/dataset_push_box_hardware/trained_models/dynamics" train_dir = os.path.join(train_dir, model_name) ckpt_file = os.path.join(train_dir, "net_best_dy_state_dict.pth") config = load_yaml(os.path.join(train_dir, 'config.yaml')) state_dict = torch.load(ckpt_file) # build dynamics model model_dy = build_dynamics_model(config) # print("state_dict.keys()", state_dict.keys()) model_dy.load_state_dict(state_dict) model_dy = model_dy.eval() model_dy = model_dy.cuda() spatial_descriptor_data = load_pickle( os.path.join(train_dir, 'spatial_descriptors.p')) metadata = load_pickle(os.path.join(train_dir, 'metadata.p')) # build dense-descriptor model model_dd_file = metadata['model_file'] model_dd = torch.load(model_dd_file) model_dd = model_dd.eval() model_dd = model_dd.cuda() # load the dataset dataset_paths = get_dataset_paths(dataset_name) dataset_root = dataset_paths['dataset_root'] episodes_config = dataset_paths['episodes_config'] precomputed_vision_data_root = DD_utils.get_precomputed_data_root( dataset_name)['precomputed_data_root'] # descriptor_keypoints_root = os.path.join(precomputed_vision_data_root, 'descriptor_keypoints') descriptor_keypoints_root = os.path.join(precomputed_vision_data_root, 'descriptor_keypoints') multi_episode_dict = DynamicSpartanEpisodeReader.load_dataset( config=config, episodes_config=episodes_config, episodes_root=dataset_paths['dataset_root'], load_image_episode=True, precomputed_data_root=descriptor_keypoints_root, max_num_episodes=None) visual_observation_function = PrecomputedVisualObservationFunctionFactory.function_from_config( config, keypoint_idx=spatial_descriptor_data['spatial_descriptors_idx']) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) dataset = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, visual_observation_function=visual_observation_function, phase="valid", # this means no data augmentation ) #### PLANNER ####### planner = None # make a planner config planner_config = copy.copy(model_dy.config) config_tmp = load_yaml( os.path.join(get_project_root(), 'experiments/exp_22_push_box_hardware/config_DD_3D.yaml')) planner_config['mpc'] = config_tmp['mpc'] if PLANNER_TYPE == "random_shooting": planner = RandomShootingPlanner(planner_config) elif PLANNER_TYPE == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (PLANNER_TYPE)) return { "model_dy": model_dy, 'model_dd': model_dd, 'dataset': dataset, 'config': config, "multi_episode_dict": multi_episode_dict, 'spatial_descriptor_data': spatial_descriptor_data, 'planner': planner, 'observation_function': observation_function, 'action_function': action_function, }
def load_model_and_data(): dataset_name = "push_box_hardware" # model_name = "DD_2D/2020-06-24-22-22-58-234812_DD_3D_n_his_2" # this model is actually 3D # model_name = "DD_3D/2020-06-25-00-49-29-679042_DD_3D_n_his_2_T_aug" # model_name = "DD_3D/2020-06-25-00-39-29-020621_DD_3D_n_his_2" model_name = "DD_3D/2020-07-02-17-59-21-362337_DD_3D_n_his_2_T_aug" train_dir = "/home/manuelli/data/key_dynam/dev/experiments/22/dataset_push_box_hardware/trained_models/dynamics" train_dir = os.path.join(train_dir, model_name) ckpt_file = os.path.join(train_dir, "net_best_dy_state_dict.pth") config = load_yaml(os.path.join(train_dir, 'config.yaml')) state_dict = torch.load(ckpt_file) # build dynamics model model_dy = build_dynamics_model(config) # print("state_dict.keys()", state_dict.keys()) model_dy.load_state_dict(state_dict) model_dy = model_dy.eval() model_dy = model_dy.cuda() spatial_descriptor_data = load_pickle( os.path.join(train_dir, 'spatial_descriptors.p')) metadata = load_pickle(os.path.join(train_dir, 'metadata.p')) # load the dataset dataset_paths = get_dataset_paths(dataset_name) dataset_root = dataset_paths['dataset_root'] episodes_config = dataset_paths['episodes_config'] precomputed_vision_data_root = DD_utils.get_precomputed_data_root( dataset_name)['precomputed_data_root'] # descriptor_keypoints_root = os.path.join(precomputed_vision_data_root, 'descriptor_keypoints') descriptor_keypoints_root = os.path.join(precomputed_vision_data_root, 'descriptor_keypoints') multi_episode_dict = DynamicSpartanEpisodeReader.load_dataset( config=config, episodes_config=episodes_config, episodes_root=dataset_paths['dataset_root'], load_image_episode=True, precomputed_data_root=descriptor_keypoints_root, max_num_episodes=None) visual_observation_function = PrecomputedVisualObservationFunctionFactory.function_from_config( config, keypoint_idx=spatial_descriptor_data['spatial_descriptors_idx']) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) dataset = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, visual_observation_function=visual_observation_function, phase="valid", # this means no data augmentation ) return { "model_dy": model_dy, 'dataset': dataset, 'config': config, "multi_episode_dict": multi_episode_dict, 'spatial_descriptor_data': spatial_descriptor_data, }
def train_dynamics( config, train_dir, # str: directory to save output multi_episode_dict=None, spatial_descriptors_idx=None, metadata=None, spatial_descriptors_data=None, ): assert multi_episode_dict is not None # assert spatial_descriptors_idx is not None # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train'][ 'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) if metadata is not None: save_pickle(metadata, os.path.join(train_dir, 'metadata.p')) if spatial_descriptors_data is not None: save_pickle(spatial_descriptors_data, os.path.join(train_dir, 'spatial_descriptors.p')) training_stats = dict() training_stats_file = os.path.join(train_dir, 'training_stats.yaml') # load the data action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers'], drop_last=True) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . ''' Build model for dynamics prediction ''' model_dy = build_dynamics_model(config) camera_name = config['vision_net']['camera_name'] # criterion criterionMSE = nn.MSELoss() l1Loss = nn.L1Loss() smoothL1 = nn.SmoothL1Loss() # optimizer params = model_dy.parameters() lr = float(config['train']['lr']) optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999)) # setup scheduler sc = config['train']['lr_scheduler'] scheduler = None if config['train']['lr_scheduler']['enabled']: if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau": scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=sc['factor'], patience=sc['patience'], threshold_mode=sc['threshold_mode'], cooldown=sc['cooldown'], verbose=True) elif config['train']['lr_scheduler']['type'] == "StepLR": step_size = config['train']['lr_scheduler']['step_size'] gamma = config['train']['lr_scheduler']['gamma'] scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma) else: raise ValueError("unknown scheduler type: %s" % (config['train']['lr_scheduler']['type'])) if use_gpu: print("using gpu") model_dy = model_dy.cuda() # print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device) # print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net))) best_valid_loss = np.inf valid_loss_type = config['train']['valid_loss_type'] global_iteration = 0 counters = {'train': 0, 'valid': 0} epoch_counter_external = 0 loss = 0 index_map = get_object_and_robot_state_indices(config) object_state_indices = torch.LongTensor(index_map['object_indices']) robot_state_indices = torch.LongTensor(index_map['robot_indices']) object_state_shape = config['dataset']['object_state_shape'] try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: # only validate at a certain frequency if (phase == "valid") and ( (epoch % config['train']['valid_frequency']) != 0): continue model_dy.train(phase == 'train') average_meter_container = dict() step_duration_meter = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): loss_container = dict() # store the losses for this step step_start_time = time.time() global_iteration += 1 counters[phase] += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" % (global_iteration)) print("n_samples", n_samples) # [B, n_samples, obs_dim] observations = data['observations'] visual_observations_list = data[ 'visual_observations_list'] # [B, n_samples, action_dim] actions = data['actions'] B = actions.shape[0] if use_gpu: observations = observations.cuda() actions = actions.cuda() # compile the visual observations # compute the output of the visual model for all timesteps visual_model_output_list = [] for visual_obs in visual_observations_list: # visual_obs is a dict containing observation for a single # time step (of course across a batch however) # visual_obs[<camera_name>]['rgb_tensor'] has shape [B, 3, H, W] # probably need to cast input to cuda # [B, -1, 3] keypoints = visual_obs[camera_name][ 'descriptor_keypoints_3d_world_frame'] # [B, K, 3] where K = len(spatial_descriptors_idx) keypoints = keypoints[:, spatial_descriptors_idx] B, K, _ = keypoints.shape # [B, K*3] keypoints_reshape = keypoints.reshape([B, K * 3]) if DEBUG: print("keypoints.shape", keypoints.shape) print("keypoints_reshape.shape", keypoints_reshape.shape) visual_model_output_list.append(keypoints_reshape) visual_model_output = None if len(visual_model_output_list) > 0: # concatenate this into a tensor # [B, n_samples, vision_model_out_dim] visual_model_output = torch.stack( visual_model_output_list, dim=1) else: visual_model_output = torch.Tensor( ) # empty tensor # states, actions = data assert actions.shape[1] == n_samples # cast this to float so it can be concatenated below visual_model_output = visual_model_output.type_as( observations) # we don't have any visual observations, so states are observations # states is gotten by concatenating visual_observations and observations # [B, n_samples, vision_model_out_dim + obs_dim] states = torch.cat((visual_model_output, observations), dim=-1) # state_cur: B x n_his x state_dim # state_cur = states[:, :n_his] # [B, n_his, state_dim] state_init = states[:, :n_his] # We want to rollout n_roll steps # actions = [B, n_his + n_roll, -1] # so we want action_seq.shape = [B, n_roll, -1] action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx: action_end_idx, :] if DEBUG: print("states.shape", states.shape) print("state_init.shape", state_init.shape) print("actions.shape", actions.shape) print("action_seq.shape", action_seq.shape) # try using models_dy.rollout_model instead of doing this manually rollout_data = rollout_model(state_init=state_init, action_seq=action_seq, dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] state_rollout_pred = rollout_data['state_pred'] # [B, n_roll, state_dim] state_rollout_gt = states[:, n_his:] if DEBUG: print("state_rollout_gt.shape", state_rollout_gt.shape) print("state_rollout_pred.shape", state_rollout_pred.shape) # the loss function is between # [B, n_roll, state_dim] state_pred_err = state_rollout_pred - state_rollout_gt # [B, n_roll, object_state_dim] object_state_err = state_pred_err[:, :, object_state_indices] B, n_roll, object_state_dim = object_state_err.shape # [B, n_roll, *object_state_shape] object_state_err_reshape = object_state_err.reshape( [B, n_roll, *object_state_shape]) # num weights J = object_state_err_reshape.shape[2] weights = model_dy.weight_matrix assert len( weights) == J, "len(weights) = %d, but J = %d" % ( len(weights), J) # loss mse object, note the use of broadcasting semantics # [B, n_roll] object_state_loss_mse = weights * torch.pow( object_state_err_reshape, 2).sum(dim=-1) object_state_loss_mse = object_state_loss_mse.mean() l2_object = (weights * torch.norm( object_state_err_reshape, dim=-1)).mean() l2_object_final_step = (weights * torch.norm( object_state_err_reshape[:, -1], dim=-1)).mean() # [B, n_roll, robot_state_dim] robot_state_err = state_pred_err[:, :, robot_state_indices] robot_state_loss_mse = torch.pow(robot_state_err, 2).sum(dim=-1).mean() loss_container[ 'object_state_loss_mse'] = object_state_loss_mse loss_container[ 'robot_state_loss_mse'] = robot_state_loss_mse loss_container['l2_object'] = l2_object loss_container[ 'l2_object_final_step'] = l2_object_final_step # total loss loss = object_state_loss_mse + robot_state_loss_mse loss_container['loss'] = loss for key, val in loss_container.items(): if not key in average_meter_container: average_meter_container[key] = AverageMeter() average_meter_container[key].update(val.item(), B) step_duration_meter.update(time.time() - step_start_time) if phase == 'train': optimizer.zero_grad() loss.backward() optimizer.step() if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0): log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) # log += ', l2: %.6f' % (loss_container['l2'].item()) # log += ', l2_final_step: %.6f' %(loss_container['l2_final_step'].item()) log += ', step time %.6f' % (step_duration_meter.avg) step_duration_meter.reset() print(log) # log data to tensorboard # only do it once we have reached 100 iterations if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss_train/%s" % (phase), loss.item(), global_iteration) for loss_type, loss_obj in loss_container.items(): plot_name = "Loss/%s/%s" % (loss_type, phase) writer.add_scalar(plot_name, loss_obj.item(), counters[phase]) # only plot the weights if we are in the train phase . . . . if phase == "train": for i in range(len(weights)): plot_name = "Weights/%d" % (i) writer.add_scalar(plot_name, weights[i].item(), counters[phase]) if phase == 'train' and global_iteration % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], average_meter_container[valid_loss_type].avg, best_valid_loss) print(log) # record all average_meter losses for key, meter in average_meter_container.items(): writer.add_scalar("AvgMeter/%s/%s" % (key, phase), meter.avg, epoch) if phase == "train": if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "StepLR"): scheduler.step() if phase == 'valid': if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau"): scheduler.step( average_meter_container[valid_loss_type].avg) if average_meter_container[ valid_loss_type].avg < best_valid_loss: best_valid_loss = average_meter_container[ valid_loss_type].avg training_stats['epoch'] = epoch training_stats['global_iteration'] = counters['valid'] save_yaml(training_stats, training_stats_file) save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def main(): # load dynamics model model_dict = load_autoencoder_model() model = model_dict['model_dy'] model_ae = model_dict['model_ae'] visual_observation_function = model_dict['visual_observation_function'] config = model.config env_config = load_yaml( os.path.join(get_project_root(), 'experiments/exp_18_box_on_side/config.yaml')) env_config['env']['observation']['depth_int16'] = True n_history = config['train']['n_history'] # create the environment # create the environment env = DrakePusherSliderEnv(env_config) env.reset() # create another environment for doing rollouts env2 = DrakePusherSliderEnv(env_config, visualize=False) env2.reset() action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.drake_pusher_position_3D( config) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder( observation_function=observation_function, visual_observation_function=visual_observation_function, action_function=action_function, episode=episode) vis = meshcat_utils.make_default_visualizer_object() vis.delete() initial_cond = get_initial_state() reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) obs_init = env.get_observation() # visualize starting position of the object print("obs_init.keys()", obs_init.keys()) print("obs_init['slider']['position']", obs_init['slider']['position']) T = DrakePusherSliderEnv.object_position_from_observation(obs_init) vis['start_pose'].set_object(triad(scale=0.1)) vis['state_pose'].set_transform(T) #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############ reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) # add just some large number of these episode.clear() for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) #### ROLLOUT THE ACTION SEQUENCE USING THE SIMULATOR ########## # rollout single action sequence using the simulator gt_rollout_data = env_utils.rollout_action_sequence( env, initial_cond['action_sequence'].cpu().numpy()) env_obs_rollout_gt = gt_rollout_data['observations'] gt_rollout_episode = gt_rollout_data['episode_reader'] for i, env_obs in enumerate(gt_rollout_data['observations']): T = DrakePusherSliderEnv.object_position_from_observation(env_obs) vis_name = "GT_trajectory/%d" % (i) vis[vis_name].set_object(triad(scale=0.1)) vis[vis_name].set_transform(T) action_state_gt = mpc_input_builder.get_action_state_tensors( start_idx=0, num_timesteps=N, episode=gt_rollout_episode) state_rollout_gt = action_state_gt['states'] action_rollout_gt = action_state_gt['actions'] z_object_rollout_gt = model.compute_z_state( state_rollout_gt)['z_object_flat'] print('state_rollout_gt.shape', state_rollout_gt.shape) print("z_object_rollout_gt.shape", z_object_rollout_gt.shape) def goal_func(obs_tmp): state_tmp = mpc_input_builder.get_state_input_single_timestep( {'observation': obs_tmp})['state'] return model.compute_z_state( state_tmp.unsqueeze(0))['z_object'].flatten() # using the vision model to get "goal" keypoints z_object_goal = goal_func(env_obs_rollout_gt[-1]) z_object_goal_np = torch_utils.cast_to_numpy(z_object_goal) # input("press Enter to continue") #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############ reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) # add just some large number of these episode.clear() for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) # [n_history, state_dim] idx = episode.get_latest_idx() dyna_net_input = mpc_input_builder.get_dynamics_model_input( idx, n_history=n_history) state_init = dyna_net_input['states'].cuda() # [n_history, state_dim] action_init = dyna_net_input['actions'] # [n_history, action_dim] print("state_init.shape", state_init.shape) print("action_init.shape", action_init.shape) print("n_history", n_history) action_seq_gt_torch = initial_cond['action_sequence'] action_input = torch.cat( (action_init[:(n_history - 1)], action_seq_gt_torch), dim=0).cuda() print("action_input.shape", action_input.shape) # rollout using the ground truth actions and learned model # need to add the batch dim to do that z_init = model.compute_z_state(state_init)['z'] rollout_pred = rollout_model(state_init=z_init.unsqueeze(0), action_seq=action_input.unsqueeze(0), dynamics_net=model, compute_debug_data=True) state_pred_rollout = rollout_pred['state_pred'].squeeze(0) print("state_pred_rollout.shape", state_pred_rollout.shape) # input("press Enter to continue") # check L2 distance between predicted and actual # basically comparing state_pred_rollout and state_rollout_gt print("state_rollout_gt[-1]\n", state_rollout_gt[-1]) print("state_pred_rollout[-1]\n", state_pred_rollout[-1]) index_dict = get_object_and_robot_state_indices(config) object_indices = index_dict['object_indices'] # reset the environment and use the MPC controller to stabilize this # now setup the MPC to try to stabilize this . . . . reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) episode.clear() # add just some large number of these for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) # input("press Enter to continue") # make a planner config planner_config = copy.copy(config) config_tmp = load_yaml( os.path.join(get_project_root(), 'experiments/drake_pusher_slider/eval_config.yaml')) planner_config['mpc'] = config_tmp['mpc'] planner_config['mpc']['mppi']['terminal_cost_only'] = TERMINAL_COST_ONLY planner_config['mpc']['random_shooting'][ 'terminal_cost_only'] = TERMINAL_COST_ONLY planner = None if PLANNER_TYPE == "random_shooting": planner = RandomShootingPlanner(planner_config) elif PLANNER_TYPE == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (PLANNER_TYPE)) mpc_out = None action_seq_mpc = None state_pred_mpc = None counter = -1 while True: counter += 1 print("\n\n-----Running MPC Optimization: Counter (%d)-------" % (counter)) obs_cur = env.get_observation() episode.add_observation_only(obs_cur) if counter == 0 or REPLAN: print("replanning") ####### Run the MPC ########## # [1, state_dim] n_look_ahead = N - counter if USE_FIXED_MPC_HORIZON: n_look_ahead = MPC_HORIZON elif USE_SHORT_HORIZON_MPC: n_look_ahead = min(MPC_HORIZON, N - counter) if n_look_ahead == 0: break start_idx = counter end_idx = counter + n_look_ahead print("start_idx:", start_idx) print("end_idx:", end_idx) print("n_look_ahead", n_look_ahead) # start_time = time.time() # idx of current observation idx = episode.get_latest_idx() mpc_start_time = time.time() mpc_input_data = mpc_input_builder.get_dynamics_model_input( idx, n_history=n_history) state_cur = mpc_input_data['states'] action_his = mpc_input_data['actions'] if SEED_WITH_NOMINAL_ACTIONS: action_seq_rollout_init = action_seq_gt_torch[ start_idx:end_idx] else: if mpc_out is not None: action_seq_rollout_init = mpc_out['action_seq'][1:] print("action_seq_rollout_init.shape", action_seq_rollout_init.shape) if action_seq_rollout_init.shape[0] < n_look_ahead: num_steps = n_look_ahead - action_seq_rollout_init.shape[ 0] action_seq_zero = torch.zeros([num_steps, 2]) action_seq_rollout_init = torch.cat( (action_seq_rollout_init, action_seq_zero), dim=0) print("action_seq_rollout_init.shape", action_seq_rollout_init.shape) else: action_seq_rollout_init = None # run MPPI z_cur = None with torch.no_grad(): z_cur = model.compute_z_state( state_cur.unsqueeze(0).cuda())['z'].squeeze(0) if action_seq_rollout_init is not None: n_look_ahead = action_seq_rollout_init.shape[0] obs_goal = None print("z_object_rollout_gt.shape", z_object_rollout_gt.shape) if TRAJECTORY_GOAL: obs_goal = z_object_rollout_gt[start_idx:end_idx] print("n_look_ahead", n_look_ahead) print("obs_goal.shape", obs_goal.shape) # add the final goal state on as needed if obs_goal.shape[0] < n_look_ahead: obs_goal_final = z_object_rollout_gt[-1].unsqueeze(0) num_repeat = n_look_ahead - obs_goal.shape[0] obs_goal_final_expand = obs_goal_final.expand( [num_repeat, -1]) obs_goal = torch.cat((obs_goal, obs_goal_final_expand), dim=0) else: obs_goal = z_object_rollout_gt[-1] obs_goal = torch_utils.cast_to_numpy(obs_goal) print("obs_goal.shape", obs_goal.shape) seed = 1 set_seed(seed) mpc_out = planner.trajectory_optimization( state_cur=z_cur, action_his=action_his, obs_goal=obs_goal, model_dy=model, action_seq_rollout_init=action_seq_rollout_init, n_look_ahead=n_look_ahead, eval_indices=object_indices, rollout_best_action_sequence=True, verbose=True, add_grid_action_samples=True, ) print("MPC step took %.4f seconds" % (time.time() - mpc_start_time)) action_seq_mpc = torch_utils.cast_to_numpy(mpc_out['action_seq']) state_pred_mpc = torch_utils.cast_to_numpy(mpc_out['state_pred']) # Rollout with ground truth simulator dynamics env2.set_simulator_state_from_observation_dict( env2.get_mutable_context(), obs_cur) obs_mpc_gt = env_utils.rollout_action_sequence( env2, action_seq_mpc)['observations'] vis['mpc_3D'].delete() vis['mpc_GT_3D'].delete() L = len(obs_mpc_gt) print("L", L) if L == 0: break for i in range(L): # ground truth rollout of the MPC action_seq name = "mpc_GT_3D/%d" % (i) T_W_obj = DrakePusherSliderEnv.object_position_from_observation( obs_mpc_gt[i]) vis[name].set_object(triad(scale=0.1)) vis[name].set_transform(T_W_obj) action_cur = action_seq_mpc[0] print("action_cur", action_cur) print("action_GT", initial_cond['action']) input("press Enter to continue") # add observation actions to the episode obs_cur = env.get_observation() episode.replace_observation_action(obs_cur, action_cur) # step the simulator env.step(action_cur) # update the trajectories, in case we aren't replanning action_seq_mpc = action_seq_mpc[1:] state_pred_mpc = state_pred_mpc[1:] pose_error = compute_pose_error(env_obs_rollout_gt[-1], obs_cur) print("position_error: %.3f" % (pose_error['position_error'])) print("angle error degrees: %.3f" % (pose_error['angle_error_degrees'])) obs_final = env.get_observation() pose_error = compute_pose_error(env_obs_rollout_gt[-1], obs_final) print("position_error: %.3f" % (pose_error['position_error'])) print("angle error degrees: %.3f" % (pose_error['angle_error_degrees']))
def mpc_w_learned_dynamics(config, train_dir, mpc_dir, state_dict_path=None, keypoint_observation=False): # set random seed for reproduction set_seed(config['train']['random_seed']) tee = Tee(os.path.join(mpc_dir, 'mpc.log'), 'w') print(config) use_gpu = torch.cuda.is_available() ''' model ''' if config['dynamics']['model_type'] == 'mlp': model_dy = DynaNetMLP(config) else: raise AssertionError("Unknown model type %s" % config['dynamics']['model_type']) # print model #params print("model #params: %d" % count_trainable_parameters(model_dy)) if state_dict_path is None: if config['mpc']['mpc_dy_epoch'] == -1: state_dict_path = os.path.join(train_dir, 'net_best_dy.pth') else: state_dict_path = os.path.join( train_dir, 'net_dy_epoch_%d_iter_%d.pth' % \ (config['mpc']['mpc_dy_epoch'], config['mpc']['mpc_dy_iter'])) print("Loading saved ckp from %s" % state_dict_path) model_dy.load_state_dict(torch.load(state_dict_path)) model_dy.eval() if use_gpu: model_dy.cuda() criterionMSE = nn.MSELoss() # generate action/observation functions action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) # planner planner = planner_from_config(config) ''' env ''' # set up goal obs_goals = np.array([[ 262.9843, 267.3102, 318.9369, 351.1229, 360.2048, 323.5128, 305.6385, 240.4460, 515.4230, 347.8708 ], [ 381.8694, 273.6327, 299.6685, 331.0925, 328.7724, 372.0096, 411.0972, 314.7053, 517.7299, 268.4953 ], [ 284.8728, 275.7985, 374.0677, 320.4990, 395.4019, 275.4633, 306.2896, 231.4310, 507.0849, 312.4057 ], [ 313.1638, 271.4258, 405.0255, 312.2325, 424.7874, 266.3525, 333.6973, 225.7708, 510.1232, 305.3802 ], [ 308.6859, 270.9629, 394.2789, 323.2781, 419.7905, 280.1602, 333.8901, 228.1624, 519.1964, 321.5318 ], [ 386.8067, 284.8947, 294.2467, 323.2223, 313.3221, 368.9970, 405.9415, 330.9298, 495.9970, 268.9920 ], [ 432.0219, 299.6021, 340.8581, 339.4676, 360.2354, 384.5515, 451.4394, 345.2190, 514.6357, 291.2043 ], [ 351.3389, 264.5325, 267.5279, 318.2321, 293.7460, 360.0423, 378.4428, 306.9586, 516.4390, 259.7810 ], [ 521.1902, 254.0693, 492.7884, 349.7861, 539.6320, 364.5190, 569.2258, 268.8824, 506.9431, 286.9752 ], [ 264.8554, 275.9547, 338.1317, 345.3435, 372.7012, 308.4648, 299.3454, 239.9245, 506.2117, 373.8413 ]]) for mpc_idx in range(config['mpc']['num_episodes']): if keypoint_observation: mpc_episode_keypoint_observation(config, mpc_idx, model_dy, mpc_dir, planner, obs_goals[mpc_idx], action_function, observation_function, use_gpu=use_gpu) else: # not supported for now raise AssertionError("currently only support keypoint observation")
def eval_dynamics(config, train_dir, eval_dir, state_dict_path=None, keypoint_observation=False, debug=False, render_human=False): # set random seed for reproduction set_seed(config['train']['random_seed']) tee = Tee(os.path.join(eval_dir, 'eval.log'), 'w') print(config) use_gpu = torch.cuda.is_available() ''' model ''' model_dy = DynaNetMLP(config) # print model #params print("model #params: %d" % count_trainable_parameters(model_dy)) if state_dict_path is None: if config['eval']['eval_dy_epoch'] == -1: state_dict_path = os.path.join(train_dir, 'net_best_dy.pth') else: state_dict_path = os.path.join( train_dir, 'net_dy_epoch_%d_iter_%d.pth' % \ (config['eval']['eval_dy_epoch'], config['eval']['eval_dy_iter'])) print("Loading saved ckp from %s" % state_dict_path) model_dy.load_state_dict(torch.load(state_dict_path)) model_dy.eval() if use_gpu: model_dy.cuda() criterionMSE = nn.MSELoss() bar = ProgressBar() st_idx = config['eval']['eval_st_idx'] ed_idx = config['eval']['eval_ed_idx'] # load the data episodes = load_episodes_from_config(config) # generate action/observation functions action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) dataset = MultiEpisodeDataset(config, action_function=action_function, observation_function=observation_function, episodes=episodes, phase="valid") episode_names = dataset.get_episode_names() episode_names.sort() num_episodes = None # for backwards compatibility if "num_episodes" in config["eval"]: num_episodes = config["eval"]["num_episodes"] else: num_episodes = 10 episode_list = [] if debug: episode_list = [episode_names[0]] else: episode_list = episode_names[:num_episodes] for roll_idx, episode_name in enumerate(episode_list): print("episode_name", episode_name) if keypoint_observation: eval_episode_keypoint_observations(config, dataset, episode_name, roll_idx, model_dy, eval_dir, start_idx=9, n_prediction=30, render_human=render_human) else: eval_episode(config, dataset, episode_name, roll_idx, model_dy, eval_dir, start_idx=9, n_prediction=30, render_human=render_human)
def train_dynamics( config, train_dir, # str: directory to save output multi_episode_dict=None, visual_observation_function=None, metadata=None, spatial_descriptors_data=None, ): assert multi_episode_dict is not None # assert spatial_descriptors_idx is not None # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train'][ 'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) if metadata is not None: save_pickle(metadata, os.path.join(train_dir, 'metadata.p')) if spatial_descriptors_data is not None: save_pickle(spatial_descriptors_data, os.path.join(train_dir, 'spatial_descriptors.p')) training_stats = dict() training_stats_file = os.path.join(train_dir, 'training_stats.yaml') action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase, visual_observation_function=visual_observation_function) print("len(datasets[phase])", len(datasets[phase])) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers'], drop_last=True) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . if False: dataset = datasets["train"] data = dataset[0] print("data['observations_combined'].shape", data['observations_combined'].shape) print("data.keys()", data.keys()) print("data['observations_combined']", data['observations_combined'][0]) print("data['observations_combined'].shape", data['observations_combined'].shape) print("data['actions'].shape", data['actions'].shape) print("data['actions']\n", data['actions']) quit() ''' Build model for dynamics prediction ''' model_dy = build_dynamics_model(config) if config['dynamics_net'] == "mlp_weight_matrix": raise ValueError("can't use weight matrix with standard setup") # criterion criterionMSE = nn.MSELoss() l1Loss = nn.L1Loss() smoothL1 = nn.SmoothL1Loss() # optimizer params = model_dy.parameters() lr = float(config['train']['lr']) optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999)) # setup scheduler sc = config['train']['lr_scheduler'] scheduler = None if config['train']['lr_scheduler']['enabled']: if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau": scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=sc['factor'], patience=sc['patience'], threshold_mode=sc['threshold_mode'], cooldown=sc['cooldown'], verbose=True) elif config['train']['lr_scheduler']['type'] == "StepLR": step_size = config['train']['lr_scheduler']['step_size'] gamma = config['train']['lr_scheduler']['gamma'] scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma) else: raise ValueError("unknown scheduler type: %s" % (config['train']['lr_scheduler']['type'])) if use_gpu: print("using gpu") model_dy = model_dy.cuda() # print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device) # print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net))) best_valid_loss = np.inf valid_loss_type = config['train']['valid_loss_type'] global_iteration = 0 counters = {'train': 0, 'valid': 0} epoch_counter_external = 0 loss = 0 try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: # only validate at a certain frequency if (phase == "valid") and ( (epoch % config['train']['valid_frequency']) != 0): continue model_dy.train(phase == 'train') average_meter_container = dict() step_duration_meter = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): loss_container = dict() # store the losses for this step step_start_time = time.time() global_iteration += 1 counters[phase] += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" % (global_iteration)) print("n_samples", n_samples) # [B, n_samples, obs_dim] states = data['observations_combined'] # [B, n_samples, action_dim] actions = data['actions'] B = actions.shape[0] if use_gpu: states = states.cuda() actions = actions.cuda() # state_cur: B x n_his x state_dim # state_cur = states[:, :n_his] # [B, n_his, state_dim] state_init = states[:, :n_his] # We want to rollout n_roll steps # actions = [B, n_his + n_roll, -1] # so we want action_seq.shape = [B, n_roll, -1] action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx: action_end_idx, :] if DEBUG: print("states.shape", states.shape) print("state_init.shape", state_init.shape) print("actions.shape", actions.shape) print("action_seq.shape", action_seq.shape) # try using models_dy.rollout_model instead of doing this manually rollout_data = rollout_model(state_init=state_init, action_seq=action_seq, dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] state_rollout_pred = rollout_data['state_pred'] # [B, n_roll, state_dim] state_rollout_gt = states[:, n_his:] if DEBUG: print("state_rollout_gt.shape", state_rollout_gt.shape) print("state_rollout_pred.shape", state_rollout_pred.shape) # the loss function is between # [B, n_roll, state_dim] state_pred_err = state_rollout_pred - state_rollout_gt # everything is in 3D space now so no need to do any scaling # all the losses would be in meters . . . . loss_mse = criterionMSE(state_rollout_pred, state_rollout_gt) loss_l1 = l1Loss(state_rollout_pred, state_rollout_gt) loss_l2 = torch.norm(state_pred_err, dim=-1).mean() loss_smoothl1 = smoothL1(state_rollout_pred, state_rollout_gt) loss_smoothl1_final_step = smoothL1( state_rollout_pred[:, -1], state_rollout_gt[:, -1]) # compute losses at final step of the rollout mse_final_step = criterionMSE( state_rollout_pred[:, -1], state_rollout_gt[:, -1]) l2_final_step = torch.norm(state_pred_err[:, -1], dim=-1).mean() l1_final_step = l1Loss(state_rollout_pred[:, -1], state_rollout_gt[:, -1]) loss_container['mse'] = loss_mse loss_container['l1'] = loss_l1 loss_container['mse_final_step'] = mse_final_step loss_container['l1_final_step'] = l1_final_step loss_container['l2_final_step'] = l2_final_step loss_container['l2'] = loss_l2 loss_container['smooth_l1'] = loss_smoothl1 loss_container[ 'smooth_l1_final_step'] = loss_smoothl1_final_step # compute the loss loss = 0 for key, val in config['loss_function'].items(): if val['enabled']: loss += loss_container[key] * val['weight'] loss_container['loss'] = loss for key, val in loss_container.items(): if not key in average_meter_container: average_meter_container[key] = AverageMeter() average_meter_container[key].update(val.item(), B) step_duration_meter.update(time.time() - step_start_time) if phase == 'train': optimizer.zero_grad() loss.backward() optimizer.step() if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0): log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', l2: %.6f' % (loss_container['l2'].item()) log += ', l2_final_step: %.6f' % ( loss_container['l2_final_step'].item()) log += ', step time %.6f' % (step_duration_meter.avg) step_duration_meter.reset() print(log) # log data to tensorboard # only do it once we have reached 100 iterations if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss_train/%s" % (phase), loss.item(), global_iteration) for loss_type, loss_obj in loss_container.items(): plot_name = "Loss/%s/%s" % (loss_type, phase) writer.add_scalar(plot_name, loss_obj.item(), counters[phase]) if phase == 'train' and global_iteration % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], average_meter_container[valid_loss_type].avg, best_valid_loss) print(log) # record all average_meter losses for key, meter in average_meter_container.items(): writer.add_scalar("AvgMeter/%s/%s" % (key, phase), meter.avg, epoch) if phase == "train": if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "StepLR"): scheduler.step() if phase == 'valid': if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau"): scheduler.step( average_meter_container[valid_loss_type].avg) if average_meter_container[ valid_loss_type].avg < best_valid_loss: best_valid_loss = average_meter_container[ valid_loss_type].avg training_stats['epoch'] = epoch training_stats['global_iteration'] = counters['valid'] save_yaml(training_stats, training_stats_file) save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def train_dynamics(config, train_dir, # str: directory to save output multi_episode_dict, # multi_episode_dict ): use_precomputed_keypoints = config['dataset']['visual_observation']['enabled'] and config['dataset']['visual_observation']['descriptor_keypoints'] # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train']['resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config(config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset(config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers'], drop_last=True) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . ''' define model for dynamics prediction ''' model_dy = build_visual_dynamics_model(config) K = config['vision_net']['num_ref_descriptors'] print("model_dy.vision_net._reference_descriptors.shape", model_dy.vision_net._ref_descriptors.shape) print("model_dy.vision_net.descriptor_dim", model_dy.vision_net.descriptor_dim) print("model_dy #params: %d" % count_trainable_parameters(model_dy)) camera_name = config['vision_net']['camera_name'] W = config['env']['rgbd_sensors']['sensor_list'][camera_name]['width'] H = config['env']['rgbd_sensors']['sensor_list'][camera_name]['height'] diag = np.sqrt(W**2 + H**2) # use this to scale the loss # sample reference descriptors unless using precomputed keypoints if not use_precomputed_keypoints: # sample reference descriptors episode_names = list(datasets["train"].episode_dict.keys()) episode_names.sort() episode_name = episode_names[0] episode = datasets["train"].episode_dict[episode_name] episode_idx = 0 camera_name = config["vision_net"]["camera_name"] image_data = episode.get_image_data(camera_name, episode_idx) des_img = torch.Tensor(image_data['descriptor']) mask_img = torch.Tensor(image_data['mask']) ref_descriptor_dict = sample_descriptors(des_img, mask_img, config['vision_net']['num_ref_descriptors']) model_dy.vision_net._ref_descriptors.data = ref_descriptor_dict['descriptors'] model_dy.vision_net.reference_image = image_data['rgb'] model_dy.vision_net.reference_indices = ref_descriptor_dict['indices'] else: metadata_file = os.path.join(get_data_root(), config['dataset']['descriptor_keypoints_dir'], 'metadata.p') descriptor_metadata = load_pickle(metadata_file) # [32, 2] ref_descriptors = torch.Tensor(descriptor_metadata['ref_descriptors']) # [K, 2] ref_descriptors = ref_descriptors[:K] model_dy.vision_net._ref_descriptors.data = ref_descriptors model_dy.vision_net._ref_descriptors_metadata = descriptor_metadata # this is just a sanity check assert model_dy.vision_net.num_ref_descriptors == K print("reference_descriptors", model_dy.vision_net._ref_descriptors) # criterion criterionMSE = nn.MSELoss() l1Loss = nn.L1Loss() # optimizer params = model_dy.parameters() lr = float(config['train']['lr']) optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999)) # setup scheduler sc = config['train']['lr_scheduler'] scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=sc['factor'], patience=sc['patience'], threshold_mode=sc['threshold_mode'], cooldown= sc['cooldown'], verbose=True) if use_gpu: print("using gpu") model_dy = model_dy.cuda() print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device) print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net))) best_valid_loss = np.inf global_iteration = 0 epoch_counter_external = 0 try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model_dy.train(phase == 'train') meter_loss_rmse = AverageMeter() step_duration_meter = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): step_start_time = time.time() global_iteration += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config['train']['n_rollout'] n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" %(global_iteration)) # visual_observations = data['visual_observations'] visual_observations_list = data['visual_observations_list'] observations = data['observations'] actions = data['actions'] if use_gpu: observations = observations.cuda() actions = actions.cuda() # states, actions = data assert actions.size(1) == n_samples B = actions.size(0) loss_mse = 0. # compute the output of the visual model for all timesteps visual_model_output_list = [] for visual_obs in visual_observations_list: # visual_obs is a dict containing observation for a single # time step (of course across a batch however) # visual_obs[<camera_name>]['rgb_tensor'] has shape [B, 3, H, W] # probably need to cast input to cuda dynamics_net_input = None if use_precomputed_keypoints: # note precomputed descriptors stored on disk are of size # K = 32. We need to trim it down to the appropriate size # [B, K_disk, 2] where K_disk is num keypoints on disk keypoints = visual_obs[camera_name]['descriptor_keypoints'] # [B, 32, 2] where K is num keypoints keypoints = keypoints[:,:K] if DEBUG: print("keypoints.shape", keypoints.shape) dynamics_net_input = keypoints.flatten(start_dim=1) else: out_dict = model_dy.vision_net.forward(visual_obs) # [B, vision_model_out_dim] dynamics_net_input = out_dict['dynamics_net_input'] visual_model_output_list.append(dynamics_net_input) # concatenate this into a tensor # [B, n_samples, vision_model_out_dim] visual_model_output = torch.stack(visual_model_output_list, dim=1) # cast this to float so it can be concatenated below visual_model_output = visual_model_output.type_as(observations) if DEBUG: print('visual_model_output.shape', visual_model_output.shape) print("observations.shape", observations.shape) print("actions.shape", actions.shape) # states is gotten by concatenating visual_observations and observations # [B, n_samples, vision_model_out_dim + obs_dim] states = torch.cat((visual_model_output, observations), dim=-1) # state_cur: B x n_his x state_dim state_cur = states[:, :n_his] if DEBUG: print("states.shape", states.shape) for j in range(n_roll): if DEBUG: print("n_roll j: %d" %(j)) state_des = states[:, n_his + j] # action_cur: B x n_his x action_dim action_cur = actions[:, j : j + n_his] if actions is not None else None # state_pred: B x state_dim # state_pred: B x state_dim input = {'observation': state_cur, 'action': action_cur, } if DEBUG: print("state_cur.shape", state_cur.shape) print("action_cur.shape", action_cur.shape) state_pred = model_dy.dynamics_net(input) # normalize by diag to ensure the loss is in [0,1] range loss_mse_cur = criterionMSE(state_pred/diag, state_des/diag) loss_mse += loss_mse_cur / n_roll # l1Loss loss_l1 = l1Loss(state_pred, state_des) # update state_cur # state_pred.unsqueeze(1): B x 1 x state_dim # state_cur: B x n_his x state_dim state_cur = torch.cat([state_cur[:, 1:], state_pred.unsqueeze(1)], 1) meter_loss_rmse.update(np.sqrt(loss_mse.item()), B) step_duration_meter.update(time.time() - step_start_time) if phase == 'train': optimizer.zero_grad() loss_mse.backward() optimizer.step() if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0): log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', rmse: %.6f (%.6f)' % ( np.sqrt(loss_mse.item()), meter_loss_rmse.avg) log += ', step time %.6f' %(step_duration_meter.avg) step_duration_meter.reset() print(log) # log data to tensorboard # only do it once we have reached 100 iterations if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss_MSE/%s" %(phase), loss_mse.item(), global_iteration) writer.add_scalar("L1/%s" %(phase), loss_l1.item(), global_iteration) writer.add_scalar("L1_fraction/%s" %(phase), loss_l1.item()/diag, global_iteration) writer.add_scalar("RMSE average loss/%s" %(phase), meter_loss_rmse.avg, global_iteration) if phase == 'train' and i % config['train']['ckp_per_iter'] == 0: save_model(model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss) print(log) if phase == 'valid': if config['train']['lr_scheduler']['enabled']: scheduler.step(meter_loss_rmse.avg) # print("\nPhase == valid") # print("meter_loss_rmse.avg", meter_loss_rmse.avg) # print("best_valid_loss", best_valid_loss) if meter_loss_rmse.avg < best_valid_loss: best_valid_loss = meter_loss_rmse.avg save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model(model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def evaluate_mpc( model_dir, config_planner_mpc=None, save_dir=None, planner_type=None, env_config=None, strict=True, generate_initial_condition_func=None, ): assert save_dir is not None assert planner_type is not None assert env_config is not None assert generate_initial_condition_func is not None model_data = load_model(model_dir, strict=strict) model_dy = model_data['model_dy'] model_config = model_dy.config config = model_config model_dd = model_data['model_dd'] # create the environment env = DrakePusherSliderEnv(env_config, visualize=False) env.reset() camera_name = model_data['metadata']['camera_name'] # sanity check camera_name_in_training = model_config['dataset'][ 'visual_observation_function']['camera_name'] assert camera_name == camera_name_in_training, "camera_names don't match: camera_name = %s, camera_name_in_trainig = %s" % ( camera_name, camera_name_in_training) T_world_camera = env.camera_pose(camera_name) camera_K_matrix = env.camera_K_matrix(camera_name) spatial_descriptor_data = model_data['spatial_descriptor_data'] ref_descriptors = torch.Tensor( spatial_descriptor_data['spatial_descriptors']).cuda() K = ref_descriptors.shape[0] action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.drake_pusher_position_3D( config) visual_observation_function = \ VisualObservationFunctionFactory.function_from_config(config, camera_name=camera_name, model_dd=model_dd, ref_descriptors=ref_descriptors, K_matrix=camera_K_matrix, T_world_camera=T_world_camera, ) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder( observation_function=observation_function, visual_observation_function=visual_observation_function, action_function=action_function, episode=episode) def goal_func(obs_local): keypoints_dict = visual_observation_function(obs_local) return keypoints_dict['tensor'] eval_indices = np.arange(3 * K) # extract the keypoints # make a planner config, same as model config but with mpc and eval sections # replaced planner_config = copy.copy(model_config) if config_planner_mpc is not None: planner_config['mpc'] = config_planner_mpc['mpc'] planner_config['eval'] = config_planner_mpc['eval'] planner = None if planner_type == "random_shooting": planner = RandomShootingPlanner(planner_config) elif planner_type == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (planner_type)) # run a single iteration mpc_eval_drake_pusher_slider.evaluate_mpc( model_dy=model_dy, env=env, episode=episode, mpc_input_builder=mpc_input_builder, planner=planner, eval_indices=eval_indices, goal_func=goal_func, config=planner_config, wait_for_user_input=False, save_dir=save_dir, model_name="test", experiment_name="test", generate_initial_condition_func=generate_initial_condition_func) return {'save_dir': save_dir}
def train_dynamics( config, train_dir, # str: directory to save output ): # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train'][ 'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) print(config) # load the data episodes = load_episodes_from_config(config) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=episodes, phase=phase) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers']) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . ''' define model for dynamics prediction ''' model_dy = None if config['train']['resume_epoch'] >= 0: # if resume from a pretrained checkpoint state_dict_path = os.path.join( train_dir, 'net_dy_epoch_%d_iter_%d_state_dict.pth' % (config['train']['resume_epoch'], config['train']['resume_iter'])) print("Loading saved ckp from %s" % state_dict_path) # why is this needed if we already do torch.load??? model_dy.load_state_dict(torch.load(state_dict_path)) # don't we also need to load optimizer state from pre-trained??? else: # not starting from pre-trained create the network and compute the # normalization parameters model_dy = DynaNetMLP(config) # compute normalization params stats = datasets["train"].compute_dataset_statistics() obs_mean = stats['observations']['mean'] obs_std = stats['observations']['std'] observations_normalizer = DataNormalizer(obs_mean, obs_std) action_mean = stats['actions']['mean'] action_std = stats['actions']['std'] actions_normalizer = DataNormalizer(action_mean, action_std) model_dy.action_normalizer = actions_normalizer model_dy.state_normalizer = observations_normalizer print("model_dy #params: %d" % count_trainable_parameters(model_dy)) # criterion criterionMSE = nn.MSELoss() # optimizer params = model_dy.parameters() optimizer = optim.Adam(params, lr=config['train']['lr'], betas=(config['train']['adam_beta1'], 0.999)) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10, verbose=True) if use_gpu: model_dy = model_dy.cuda() best_valid_loss = np.inf global_iteration = 0 epoch_counter_external = 0 try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model_dy.train(phase == 'train') meter_loss_rmse = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): global_iteration += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if config['env']['type'] in ['PusherSlider']: states = data['observations'] actions = data['actions'] if use_gpu: states = states.cuda() actions = actions.cuda() # states, actions = data assert states.size(1) == n_samples # normalize states and actions once for entire rollout states = model_dy.state_normalizer.normalize( states) actions = model_dy.action_normalizer.normalize( actions) B = states.size(0) loss_mse = 0. # state_cur: B x n_his x state_dim state_cur = states[:, :n_his] for j in range(n_roll): state_des = states[:, n_his + j] # action_cur: B x n_his x action_dim action_cur = actions[:, j:j + n_his] if actions is not None else None # state_pred: B x state_dim # state_cur: B x n_his x state_dim # state_pred: B x state_dim state_pred = model_dy(state_cur, action_cur) loss_mse_cur = criterionMSE( state_pred, state_des) loss_mse += loss_mse_cur / n_roll # update state_cur # state_pred.unsqueeze(1): B x 1 x state_dim state_cur = torch.cat([ state_cur[:, 1:], state_pred.unsqueeze(1) ], 1) meter_loss_rmse.update(np.sqrt(loss_mse.item()), B) if phase == 'train': optimizer.zero_grad() loss_mse.backward() optimizer.step() if i % config['train']['log_per_iter'] == 0: log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', rmse: %.6f (%.6f)' % (np.sqrt( loss_mse.item()), meter_loss_rmse.avg) print(log) # log data to tensorboard # only do it once we have reached 500 iterations if global_iteration > 500: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss/train", loss_mse.item(), global_iteration) writer.add_scalar("RMSE average loss/train", meter_loss_rmse.avg, global_iteration) if phase == 'train' and i % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss) print(log) if phase == 'valid': scheduler.step(meter_loss_rmse.avg) writer.add_scalar("RMSE average loss/valid", meter_loss_rmse.avg, global_iteration) if meter_loss_rmse.avg < best_valid_loss: best_valid_loss = meter_loss_rmse.avg save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def train_dynamics(config, train_dir, data_dir, model_dy, global_iteration, writer): # load the data multi_episode_dict = DrakeSimEpisodeReader.load_dataset( data_dir, load_image_data=False) ''' for episode_name in list(multi_episode_dict.keys()): print("episode name", episode_name) episode = multi_episode_dict[episode_name] obs = episode.get_observation(34) print(obs) ''' action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase) # print(config['train']) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers'], drop_last=True) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() ''' define model for dynamics prediction ''' if model_dy is None: model_dy = DynaNetMLP(config) # criterion MSELoss = nn.MSELoss() L1Loss = nn.L1Loss() # optimizer params = model_dy.parameters() lr = float(config['train']['lr']) optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999)) # setup scheduler sc = config['train']['lr_scheduler'] scheduler = None if config['train']['lr_scheduler']['enabled']: if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau": scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=sc['factor'], patience=sc['patience'], threshold_mode=sc['threshold_mode'], cooldown=sc['cooldown'], verbose=True) elif config['train']['lr_scheduler']['type'] == "StepLR": step_size = config['train']['lr_scheduler']['step_size'] gamma = config['train']['lr_scheduler']['gamma'] scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma) else: raise ValueError("unknown scheduler type: %s" % (config['train']['lr_scheduler']['type'])) if use_gpu: print("using gpu") model_dy = model_dy.cuda() best_valid_loss = np.inf counters = {'train': 0, 'valid': 0} try: for epoch in range(config['train']['n_epoch']): phases = ['train', 'valid'] writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model_dy.train(phase == 'train') meter_loss_rmse = AverageMeter() step_duration_meter = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): loss_container = dict() # store the losses for this step step_start_time = time.time() global_iteration += 1 counters[phase] += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" % global_iteration) print("n_samples", n_samples) # [B, n_samples, obs_dim] observations = data['observations'] # [B, n_samples, action_dim] actions = data['actions'] B = actions.shape[0] if use_gpu: observations = observations.cuda() actions = actions.cuda() # states, actions = data assert actions.shape[1] == n_samples loss_mse = 0. # we don't have any visual observations, so states are observations states = observations # [B, n_his, state_dim] state_init = states[:, :n_his] # We want to rollout n_roll steps # actions = [B, n_his + n_roll, -1] # so we want action_seq.shape = [B, n_roll, -1] action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx: action_end_idx, :] if DEBUG: print("states.shape", states.shape) print("state_init.shape", state_init.shape) print("actions.shape", actions.shape) print("action_seq.shape", action_seq.shape) # try using models_dy.rollout_model instead of doing this manually rollout_data = rollout_model(state_init=state_init, action_seq=action_seq, dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] state_rollout_pred = rollout_data['state_pred'] # [B, n_roll, state_dim] state_rollout_gt = states[:, n_his:] if DEBUG: print("state_rollout_gt.shape", state_rollout_gt.shape) print("state_rollout_pred.shape", state_rollout_pred.shape) # the loss function is between # [B, n_roll, state_dim] state_pred_err = state_rollout_pred - state_rollout_gt # everything is in 3D space now so no need to do any scaling # all the losses would be in meters . . . . loss_mse = MSELoss(state_rollout_pred, state_rollout_gt) loss_l1 = L1Loss(state_rollout_pred, state_rollout_gt) meter_loss_rmse.update(np.sqrt(loss_mse.item()), B) # compute losses at final step of the rollout mse_final_step = MSELoss(state_rollout_pred[:, -1, :], state_rollout_gt[:, -1, :]) l2_final_step = torch.norm(state_pred_err[:, -1], dim=-1).mean() l1_final_step = L1Loss(state_rollout_pred[:, -1, :], state_rollout_gt[:, -1, :]) loss_container['mse'] = loss_mse loss_container['l1'] = loss_l1 loss_container['mse_final_step'] = mse_final_step loss_container['l1_final_step'] = l1_final_step loss_container['l2_final_step'] = l2_final_step step_duration_meter.update(time.time() - step_start_time) if phase == 'train': optimizer.zero_grad() loss_mse.backward() optimizer.step() if i % config['train']['log_per_iter'] == 0: log = '%s %d [%d/%d][%d/%d] LR: %.6f' % ( phase, global_iteration, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', rmse: %.6f (%.6f)' % (np.sqrt( loss_mse.item()), meter_loss_rmse.avg) log += ', step time %.6f' % (step_duration_meter.avg) step_duration_meter.reset() print(log) # log data to tensorboard # only do it once we have reached 100 iterations if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss_MSE/%s" % (phase), loss_mse.item(), global_iteration) writer.add_scalar("L1/%s" % (phase), loss_l1.item(), global_iteration) writer.add_scalar("RMSE average loss/%s" % (phase), meter_loss_rmse.avg, global_iteration) writer.add_scalar("n_taj", len(multi_episode_dict), global_iteration) for loss_type, loss_obj in loss_container.items(): plot_name = "Loss/%s/%s" % (loss_type, phase) writer.add_scalar(plot_name, loss_obj.item(), global_iteration) if phase == 'train' and global_iteration % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_iter_%d' % (train_dir, global_iteration)) log = '%s %d [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, global_iteration, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss) print(log) if phase == "train": if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "StepLR"): scheduler.step() if phase == 'valid': if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau"): scheduler.step(meter_loss_rmse.avg) if meter_loss_rmse.avg < best_valid_loss: best_valid_loss = meter_loss_rmse.avg save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_iter_%d_keyboard_interrupt' % (train_dir, global_iteration)) writer.flush() # flush SummaryWriter events to disk return model_dy, global_iteration
def main(): # load dynamics model model_dict = load_model_state_dict() model = model_dict['model_dy'] model_dd = model_dict['model_dd'] config = model.config env_config = load_yaml(os.path.join(get_project_root(), 'experiments/exp_20_mugs/config.yaml')) env_config['env']['observation']['depth_int16'] = True n_history = config['train']['n_history'] initial_cond = generate_initial_condition(env_config, push_length=PUSH_LENGTH) env_config = initial_cond['config'] # enable the right observations camera_name = model_dict['metadata']['camera_name'] spatial_descriptor_data = model_dict['spatial_descriptor_data'] ref_descriptors = spatial_descriptor_data['spatial_descriptors'] K = ref_descriptors.shape[0] ref_descriptors = torch.Tensor(ref_descriptors).cuda() # put them on the GPU print("ref_descriptors\n", ref_descriptors) print("ref_descriptors.shape", ref_descriptors.shape) # create the environment # create the environment env = DrakeMugsEnv(env_config) env.reset() T_world_camera = env.camera_pose(camera_name) camera_K_matrix = env.camera_K_matrix(camera_name) # create another environment for doing rollouts env2 = DrakeMugsEnv(env_config, visualize=False) env2.reset() action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.drake_pusher_position_3D(config) visual_observation_function = \ VisualObservationFunctionFactory.descriptor_keypoints_3D(config=config, camera_name=camera_name, model_dd=model_dd, ref_descriptors=ref_descriptors, K_matrix=camera_K_matrix, T_world_camera=T_world_camera, ) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder(observation_function=observation_function, visual_observation_function=visual_observation_function, action_function=action_function, episode=episode) vis = meshcat_utils.make_default_visualizer_object() vis.delete() reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) obs_init = env.get_observation() #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############ reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) # add just some large number of these episode.clear() for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) def goal_func(obs_tmp): state_tmp = mpc_input_builder.get_state_input_single_timestep({'observation': obs_tmp})['state'] return model.compute_z_state(state_tmp.unsqueeze(0))['z_object'].flatten() # idx = episode.get_latest_idx() obs_raw = episode.get_observation(idx) z_object_goal = goal_func(obs_raw) z_keypoints_init_W = keypoints_3D_from_dynamics_model_output(z_object_goal, K) z_keypoints_init_W = torch_utils.cast_to_numpy(z_keypoints_init_W) z_keypoints_obj = keypoints_world_frame_to_object_frame(z_keypoints_init_W, T_W_obj=slider_pose_from_observation(obs_init)) color = [1, 0, 0] meshcat_utils.visualize_points(vis=vis, name="keypoints_W", pts=z_keypoints_init_W, color=color, size=0.02, ) # input("press Enter to continue") # rollout single action sequence using the simulator action_sequence_np = torch_utils.cast_to_numpy(initial_cond['action_sequence']) N = action_sequence_np.shape[0] obs_rollout_gt = env_utils.rollout_action_sequence(env, action_sequence_np)[ 'observations'] # using the vision model to get "goal" keypoints z_object_goal = goal_func(obs_rollout_gt[-1]) z_object_goal_np = torch_utils.cast_to_numpy(z_object_goal) z_keypoints_goal = keypoints_3D_from_dynamics_model_output(z_object_goal, K) z_keypoints_goal = torch_utils.cast_to_numpy(z_keypoints_goal) # visualize goal keypoints color = [0, 1, 0] meshcat_utils.visualize_points(vis=vis, name="goal_keypoints", pts=z_keypoints_goal, color=color, size=0.02, ) # input("press Enter to continue") #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############ reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) # add just some large number of these episode.clear() for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) # [n_history, state_dim] idx = episode.get_latest_idx() dyna_net_input = mpc_input_builder.get_dynamics_model_input(idx, n_history=n_history) state_init = dyna_net_input['states'].cuda() # [n_history, state_dim] action_init = dyna_net_input['actions'] # [n_history, action_dim] print("state_init.shape", state_init.shape) print("action_init.shape", action_init.shape) action_seq_gt_torch = torch_utils.cast_to_torch(initial_cond['action_sequence']) action_input = torch.cat((action_init[:(n_history-1)], action_seq_gt_torch), dim=0).cuda() print("action_input.shape", action_input.shape) # rollout using the ground truth actions and learned model # need to add the batch dim to do that z_init = model.compute_z_state(state_init)['z'] rollout_pred = rollout_model(state_init=z_init.unsqueeze(0), action_seq=action_input.unsqueeze(0), dynamics_net=model, compute_debug_data=True) state_pred_rollout = rollout_pred['state_pred'] print("state_pred_rollout.shape", state_pred_rollout.shape) for i in range(N): # vis GT for now name = "GT_3D/%d" % (i) T_W_obj = slider_pose_from_observation(obs_rollout_gt[i]) # print("T_W_obj", T_W_obj) # green color = np.array([0, 1, 0]) * get_color_intensity(i, N) meshcat_utils.visualize_points(vis=vis, name=name, pts=z_keypoints_obj, color=color, size=0.01, T=T_W_obj) # red color = np.array([0, 0, 1]) * get_color_intensity(i, N) state_pred = state_pred_rollout[:, i, :] pts_pred = keypoints_3D_from_dynamics_model_output(state_pred, K).squeeze() pts_pred = pts_pred.detach().cpu().numpy() name = "pred_3D/%d" % (i) meshcat_utils.visualize_points(vis=vis, name=name, pts=pts_pred, color=color, size=0.01, ) # input("finished visualizing GT rollout\npress Enter to continue") index_dict = get_object_and_robot_state_indices(config) object_indices = index_dict['object_indices'] # reset the environment and use the MPC controller to stabilize this # now setup the MPC to try to stabilize this . . . . reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) episode.clear() # add just some large number of these for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) # input("press Enter to continue") # make a planner config planner_config = copy.copy(config) config_tmp = load_yaml(os.path.join(get_project_root(), 'experiments/drake_pusher_slider/eval_config.yaml')) planner_config['mpc'] = config_tmp['mpc'] planner = None if PLANNER_TYPE == "random_shooting": planner = RandomShootingPlanner(planner_config) elif PLANNER_TYPE == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (PLANNER_TYPE)) mpc_out = None action_seq_mpc = None state_pred_mpc = None counter = -1 while True: counter += 1 print("\n\n-----Running MPC Optimization: Counter (%d)-------" % (counter)) obs_cur = env.get_observation() episode.add_observation_only(obs_cur) if counter == 0 or REPLAN: print("replanning") ####### Run the MPC ########## # [1, state_dim] n_look_ahead = N - counter if USE_FIXED_MPC_HORIZON: n_look_ahead = MPC_HORIZON if n_look_ahead == 0: break # start_time = time.time() # idx of current observation idx = episode.get_latest_idx() mpc_start_time = time.time() mpc_input_data = mpc_input_builder.get_dynamics_model_input(idx, n_history=n_history) state_cur = mpc_input_data['states'] action_his = mpc_input_data['actions'] if mpc_out is not None: action_seq_rollout_init = mpc_out['action_seq'][1:] else: action_seq_rollout_init = None # run MPPI z_cur = None with torch.no_grad(): z_cur = model.compute_z_state(state_cur.unsqueeze(0).cuda())['z'].squeeze(0) mpc_out = planner.trajectory_optimization(state_cur=z_cur, action_his=action_his, obs_goal=z_object_goal_np, model_dy=model, action_seq_rollout_init=action_seq_rollout_init, n_look_ahead=n_look_ahead, eval_indices=object_indices, rollout_best_action_sequence=True, verbose=True, ) print("MPC step took %.4f seconds" %(time.time() - mpc_start_time)) action_seq_mpc = mpc_out['action_seq'].cpu().numpy() # Rollout with ground truth simulator dynamics action_seq_mpc = torch_utils.cast_to_numpy(mpc_out['action_seq']) env2.set_simulator_state_from_observation_dict(env2.get_mutable_context(), obs_cur) obs_mpc_gt = env_utils.rollout_action_sequence(env2, action_seq_mpc)['observations'] state_pred_mpc = torch_utils.cast_to_numpy(mpc_out['state_pred']) vis['mpc_3D'].delete() vis['mpc_GT_3D'].delete() L = len(obs_mpc_gt) print("L", L) if L == 0: break for i in range(L): # red color = np.array([1, 0, 0]) * get_color_intensity(i, L) state_pred = state_pred_mpc[i, :] state_pred = np.expand_dims(state_pred, 0) # may need to expand dims here pts_pred = keypoints_3D_from_dynamics_model_output(state_pred, K).squeeze() name = "mpc_3D/%d" % (i) meshcat_utils.visualize_points(vis=vis, name=name, pts=pts_pred, color=color, size=0.01, ) # ground truth rollout of the MPC action_seq name = "mpc_GT_3D/%d" % (i) T_W_obj = slider_pose_from_observation(obs_mpc_gt[i]) # green color = np.array([1, 1, 0]) * get_color_intensity(i, L) meshcat_utils.visualize_points(vis=vis, name=name, pts=z_keypoints_obj, color=color, size=0.01, T=T_W_obj) action_cur = action_seq_mpc[0] print("action_cur", action_cur) # print("action_GT", initial_cond['action']) input("press Enter to continue") # add observation actions to the episode obs_cur = env.get_observation() episode.replace_observation_action(obs_cur, action_cur) # step the simulator env.step(action_cur) # visualize current keypoint positions obs_cur = env.get_observation() T_W_obj = slider_pose_from_observation(obs_cur) # yellow color = np.array([1, 1, 0]) meshcat_utils.visualize_points(vis=vis, name="keypoint_cur", pts=z_keypoints_obj, color=color, size=0.02, T=T_W_obj) action_seq_mpc = action_seq_mpc[1:] state_pred_mpc = state_pred_mpc[1:] obs_final = env.get_observation() pose_error = compute_pose_error(obs_rollout_gt[-1], obs_final) print("position_error: %.3f" %(pose_error['position_error'])) print("angle error degrees: %.3f" %(pose_error['angle_error_degrees']))
# must import pydrake BEFORE torch import pydrake # key_dynam from key_dynam.utils import dev_utils from key_dynam.dataset.episode_dataset import MultiEpisodeDataset from key_dynam.dataset.function_factory import ObservationFunctionFactory, ActionFunctionFactory multi_episode_dict = dev_utils.load_drake_pusher_slider_episodes() config = dev_utils.load_simple_config() action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config(config) dataset = MultiEpisodeDataset(config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase="train") episode_name = dataset.get_episode_names()[0] episode = dataset.episode_dict[episode_name] idx = 5 data = dataset._getitem( episode, idx, rollout_length=5, n_history=2, visual_observation=False, ) print("\n\ndata.keys()", data.keys())