def collect_episodes(config, output_dir=None, visualize=True): if output_dir is None: output_dir = os.path.join(os.getcwd(), 'data') if not os.path.exists(output_dir): os.makedirs(output_dir) # save the config config_save_file = os.path.join(output_dir, 'config.yaml') save_yaml(config, config_save_file) save_file = os.path.join(output_dir, "%s.p" % (get_current_YYYY_MM_DD_hh_mm_ss_ms())) # initialize config for DataCollector dc = PusherSliderDataCollector(config) num_episodes = config['dataset']['num_episodes'] multi_episode_container = MultiEpisodeContainer() for i in range(num_episodes): print("collecting episode %d of %d" % (i + 1, num_episodes)) name = "%s_idx_%d" % (get_current_YYYY_MM_DD_hh_mm_ss_ms(), i) episode = dc.collect_single_episode(visualize, episode_name=name) multi_episode_container.add_episode(episode) print("saving data to %s" % save_file) multi_episode_container.save_to_file(save_file)
def collect_episodes(config, output_dir=None, visualize=True, use_threads=False): if output_dir is None: output_dir = os.path.join(os.getcwd(), 'data') if not os.path.exists(output_dir): os.makedirs(output_dir) # save the config config_save_file = os.path.join(output_dir, 'config.yaml') save_yaml(config, config_save_file) # initialize config for DataCollector dc = DrakePusherSliderEpisodeCollector(config) num_episodes = config['dataset']['num_episodes'] # record some metadata metadata = dict() metadata['episodes'] = dict() for i in range(num_episodes): print("\n") start_time = time.time() print("collecting episode %d of %d" % (i + 1, num_episodes)) name = "%s_idx_%d" % (get_current_YYYY_MM_DD_hh_mm_ss_ms(), i) episode = dc.collect_single_episode(visualize, episode_name=name) print("saving to disk") metadata['episodes'][name] = dict() image_data_file = episode.save_images_to_hdf5(output_dir) non_image_data_file = episode.save_non_image_data_to_pickle(output_dir) print("non_image_data.keys()", episode.non_image_data.keys()) metadata['episodes'][name]['non_image_data_file'] = non_image_data_file metadata['episodes'][name]['image_data_file'] = image_data_file print("done saving to disk") elapsed = time.time() - start_time print("single episode took: %.2f seconds" % (elapsed)) save_yaml(metadata, os.path.join(output_dir, 'metadata.yaml'))
def train_dynamics( config, train_dir, # str: directory to save output ): # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train'][ 'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) print(config) # load the data episodes = load_episodes_from_config(config) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=episodes, phase=phase) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers']) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . ''' define model for dynamics prediction ''' model_dy = None if config['train']['resume_epoch'] >= 0: # if resume from a pretrained checkpoint state_dict_path = os.path.join( train_dir, 'net_dy_epoch_%d_iter_%d_state_dict.pth' % (config['train']['resume_epoch'], config['train']['resume_iter'])) print("Loading saved ckp from %s" % state_dict_path) # why is this needed if we already do torch.load??? model_dy.load_state_dict(torch.load(state_dict_path)) # don't we also need to load optimizer state from pre-trained??? else: # not starting from pre-trained create the network and compute the # normalization parameters model_dy = DynaNetMLP(config) # compute normalization params stats = datasets["train"].compute_dataset_statistics() obs_mean = stats['observations']['mean'] obs_std = stats['observations']['std'] observations_normalizer = DataNormalizer(obs_mean, obs_std) action_mean = stats['actions']['mean'] action_std = stats['actions']['std'] actions_normalizer = DataNormalizer(action_mean, action_std) model_dy.action_normalizer = actions_normalizer model_dy.state_normalizer = observations_normalizer print("model_dy #params: %d" % count_trainable_parameters(model_dy)) # criterion criterionMSE = nn.MSELoss() # optimizer params = model_dy.parameters() optimizer = optim.Adam(params, lr=config['train']['lr'], betas=(config['train']['adam_beta1'], 0.999)) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10, verbose=True) if use_gpu: model_dy = model_dy.cuda() best_valid_loss = np.inf global_iteration = 0 epoch_counter_external = 0 try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model_dy.train(phase == 'train') meter_loss_rmse = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): global_iteration += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if config['env']['type'] in ['PusherSlider']: states = data['observations'] actions = data['actions'] if use_gpu: states = states.cuda() actions = actions.cuda() # states, actions = data assert states.size(1) == n_samples # normalize states and actions once for entire rollout states = model_dy.state_normalizer.normalize( states) actions = model_dy.action_normalizer.normalize( actions) B = states.size(0) loss_mse = 0. # state_cur: B x n_his x state_dim state_cur = states[:, :n_his] for j in range(n_roll): state_des = states[:, n_his + j] # action_cur: B x n_his x action_dim action_cur = actions[:, j:j + n_his] if actions is not None else None # state_pred: B x state_dim # state_cur: B x n_his x state_dim # state_pred: B x state_dim state_pred = model_dy(state_cur, action_cur) loss_mse_cur = criterionMSE( state_pred, state_des) loss_mse += loss_mse_cur / n_roll # update state_cur # state_pred.unsqueeze(1): B x 1 x state_dim state_cur = torch.cat([ state_cur[:, 1:], state_pred.unsqueeze(1) ], 1) meter_loss_rmse.update(np.sqrt(loss_mse.item()), B) if phase == 'train': optimizer.zero_grad() loss_mse.backward() optimizer.step() if i % config['train']['log_per_iter'] == 0: log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', rmse: %.6f (%.6f)' % (np.sqrt( loss_mse.item()), meter_loss_rmse.avg) print(log) # log data to tensorboard # only do it once we have reached 500 iterations if global_iteration > 500: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss/train", loss_mse.item(), global_iteration) writer.add_scalar("RMSE average loss/train", meter_loss_rmse.avg, global_iteration) if phase == 'train' and i % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss) print(log) if phase == 'valid': scheduler.step(meter_loss_rmse.avg) writer.add_scalar("RMSE average loss/valid", meter_loss_rmse.avg, global_iteration) if meter_loss_rmse.avg < best_valid_loss: best_valid_loss = meter_loss_rmse.avg save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def train_dynamics( config, train_dir, # str: directory to save output multi_episode_dict=None, spatial_descriptors_idx=None, metadata=None, spatial_descriptors_data=None, ): assert multi_episode_dict is not None # assert spatial_descriptors_idx is not None # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train'][ 'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) if metadata is not None: save_pickle(metadata, os.path.join(train_dir, 'metadata.p')) if spatial_descriptors_data is not None: save_pickle(spatial_descriptors_data, os.path.join(train_dir, 'spatial_descriptors.p')) training_stats = dict() training_stats_file = os.path.join(train_dir, 'training_stats.yaml') # load the data action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers'], drop_last=True) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . ''' Build model for dynamics prediction ''' model_dy = build_dynamics_model(config) camera_name = config['vision_net']['camera_name'] # criterion criterionMSE = nn.MSELoss() l1Loss = nn.L1Loss() smoothL1 = nn.SmoothL1Loss() # optimizer params = model_dy.parameters() lr = float(config['train']['lr']) optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999)) # setup scheduler sc = config['train']['lr_scheduler'] scheduler = None if config['train']['lr_scheduler']['enabled']: if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau": scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=sc['factor'], patience=sc['patience'], threshold_mode=sc['threshold_mode'], cooldown=sc['cooldown'], verbose=True) elif config['train']['lr_scheduler']['type'] == "StepLR": step_size = config['train']['lr_scheduler']['step_size'] gamma = config['train']['lr_scheduler']['gamma'] scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma) else: raise ValueError("unknown scheduler type: %s" % (config['train']['lr_scheduler']['type'])) if use_gpu: print("using gpu") model_dy = model_dy.cuda() # print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device) # print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net))) best_valid_loss = np.inf valid_loss_type = config['train']['valid_loss_type'] global_iteration = 0 counters = {'train': 0, 'valid': 0} epoch_counter_external = 0 loss = 0 index_map = get_object_and_robot_state_indices(config) object_state_indices = torch.LongTensor(index_map['object_indices']) robot_state_indices = torch.LongTensor(index_map['robot_indices']) object_state_shape = config['dataset']['object_state_shape'] try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: # only validate at a certain frequency if (phase == "valid") and ( (epoch % config['train']['valid_frequency']) != 0): continue model_dy.train(phase == 'train') average_meter_container = dict() step_duration_meter = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): loss_container = dict() # store the losses for this step step_start_time = time.time() global_iteration += 1 counters[phase] += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" % (global_iteration)) print("n_samples", n_samples) # [B, n_samples, obs_dim] observations = data['observations'] visual_observations_list = data[ 'visual_observations_list'] # [B, n_samples, action_dim] actions = data['actions'] B = actions.shape[0] if use_gpu: observations = observations.cuda() actions = actions.cuda() # compile the visual observations # compute the output of the visual model for all timesteps visual_model_output_list = [] for visual_obs in visual_observations_list: # visual_obs is a dict containing observation for a single # time step (of course across a batch however) # visual_obs[<camera_name>]['rgb_tensor'] has shape [B, 3, H, W] # probably need to cast input to cuda # [B, -1, 3] keypoints = visual_obs[camera_name][ 'descriptor_keypoints_3d_world_frame'] # [B, K, 3] where K = len(spatial_descriptors_idx) keypoints = keypoints[:, spatial_descriptors_idx] B, K, _ = keypoints.shape # [B, K*3] keypoints_reshape = keypoints.reshape([B, K * 3]) if DEBUG: print("keypoints.shape", keypoints.shape) print("keypoints_reshape.shape", keypoints_reshape.shape) visual_model_output_list.append(keypoints_reshape) visual_model_output = None if len(visual_model_output_list) > 0: # concatenate this into a tensor # [B, n_samples, vision_model_out_dim] visual_model_output = torch.stack( visual_model_output_list, dim=1) else: visual_model_output = torch.Tensor( ) # empty tensor # states, actions = data assert actions.shape[1] == n_samples # cast this to float so it can be concatenated below visual_model_output = visual_model_output.type_as( observations) # we don't have any visual observations, so states are observations # states is gotten by concatenating visual_observations and observations # [B, n_samples, vision_model_out_dim + obs_dim] states = torch.cat((visual_model_output, observations), dim=-1) # state_cur: B x n_his x state_dim # state_cur = states[:, :n_his] # [B, n_his, state_dim] state_init = states[:, :n_his] # We want to rollout n_roll steps # actions = [B, n_his + n_roll, -1] # so we want action_seq.shape = [B, n_roll, -1] action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx: action_end_idx, :] if DEBUG: print("states.shape", states.shape) print("state_init.shape", state_init.shape) print("actions.shape", actions.shape) print("action_seq.shape", action_seq.shape) # try using models_dy.rollout_model instead of doing this manually rollout_data = rollout_model(state_init=state_init, action_seq=action_seq, dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] state_rollout_pred = rollout_data['state_pred'] # [B, n_roll, state_dim] state_rollout_gt = states[:, n_his:] if DEBUG: print("state_rollout_gt.shape", state_rollout_gt.shape) print("state_rollout_pred.shape", state_rollout_pred.shape) # the loss function is between # [B, n_roll, state_dim] state_pred_err = state_rollout_pred - state_rollout_gt # [B, n_roll, object_state_dim] object_state_err = state_pred_err[:, :, object_state_indices] B, n_roll, object_state_dim = object_state_err.shape # [B, n_roll, *object_state_shape] object_state_err_reshape = object_state_err.reshape( [B, n_roll, *object_state_shape]) # num weights J = object_state_err_reshape.shape[2] weights = model_dy.weight_matrix assert len( weights) == J, "len(weights) = %d, but J = %d" % ( len(weights), J) # loss mse object, note the use of broadcasting semantics # [B, n_roll] object_state_loss_mse = weights * torch.pow( object_state_err_reshape, 2).sum(dim=-1) object_state_loss_mse = object_state_loss_mse.mean() l2_object = (weights * torch.norm( object_state_err_reshape, dim=-1)).mean() l2_object_final_step = (weights * torch.norm( object_state_err_reshape[:, -1], dim=-1)).mean() # [B, n_roll, robot_state_dim] robot_state_err = state_pred_err[:, :, robot_state_indices] robot_state_loss_mse = torch.pow(robot_state_err, 2).sum(dim=-1).mean() loss_container[ 'object_state_loss_mse'] = object_state_loss_mse loss_container[ 'robot_state_loss_mse'] = robot_state_loss_mse loss_container['l2_object'] = l2_object loss_container[ 'l2_object_final_step'] = l2_object_final_step # total loss loss = object_state_loss_mse + robot_state_loss_mse loss_container['loss'] = loss for key, val in loss_container.items(): if not key in average_meter_container: average_meter_container[key] = AverageMeter() average_meter_container[key].update(val.item(), B) step_duration_meter.update(time.time() - step_start_time) if phase == 'train': optimizer.zero_grad() loss.backward() optimizer.step() if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0): log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) # log += ', l2: %.6f' % (loss_container['l2'].item()) # log += ', l2_final_step: %.6f' %(loss_container['l2_final_step'].item()) log += ', step time %.6f' % (step_duration_meter.avg) step_duration_meter.reset() print(log) # log data to tensorboard # only do it once we have reached 100 iterations if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss_train/%s" % (phase), loss.item(), global_iteration) for loss_type, loss_obj in loss_container.items(): plot_name = "Loss/%s/%s" % (loss_type, phase) writer.add_scalar(plot_name, loss_obj.item(), counters[phase]) # only plot the weights if we are in the train phase . . . . if phase == "train": for i in range(len(weights)): plot_name = "Weights/%d" % (i) writer.add_scalar(plot_name, weights[i].item(), counters[phase]) if phase == 'train' and global_iteration % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], average_meter_container[valid_loss_type].avg, best_valid_loss) print(log) # record all average_meter losses for key, meter in average_meter_container.items(): writer.add_scalar("AvgMeter/%s/%s" % (key, phase), meter.avg, epoch) if phase == "train": if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "StepLR"): scheduler.step() if phase == 'valid': if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau"): scheduler.step( average_meter_container[valid_loss_type].avg) if average_meter_container[ valid_loss_type].avg < best_valid_loss: best_valid_loss = average_meter_container[ valid_loss_type].avg training_stats['epoch'] = epoch training_stats['global_iteration'] = counters['valid'] save_yaml(training_stats, training_stats_file) save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def train_dynamics(config, train_dir, # str: directory to save output multi_episode_dict, # multi_episode_dict ): use_precomputed_keypoints = config['dataset']['visual_observation']['enabled'] and config['dataset']['visual_observation']['descriptor_keypoints'] # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train']['resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config(config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset(config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers'], drop_last=True) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . ''' define model for dynamics prediction ''' model_dy = build_visual_dynamics_model(config) K = config['vision_net']['num_ref_descriptors'] print("model_dy.vision_net._reference_descriptors.shape", model_dy.vision_net._ref_descriptors.shape) print("model_dy.vision_net.descriptor_dim", model_dy.vision_net.descriptor_dim) print("model_dy #params: %d" % count_trainable_parameters(model_dy)) camera_name = config['vision_net']['camera_name'] W = config['env']['rgbd_sensors']['sensor_list'][camera_name]['width'] H = config['env']['rgbd_sensors']['sensor_list'][camera_name]['height'] diag = np.sqrt(W**2 + H**2) # use this to scale the loss # sample reference descriptors unless using precomputed keypoints if not use_precomputed_keypoints: # sample reference descriptors episode_names = list(datasets["train"].episode_dict.keys()) episode_names.sort() episode_name = episode_names[0] episode = datasets["train"].episode_dict[episode_name] episode_idx = 0 camera_name = config["vision_net"]["camera_name"] image_data = episode.get_image_data(camera_name, episode_idx) des_img = torch.Tensor(image_data['descriptor']) mask_img = torch.Tensor(image_data['mask']) ref_descriptor_dict = sample_descriptors(des_img, mask_img, config['vision_net']['num_ref_descriptors']) model_dy.vision_net._ref_descriptors.data = ref_descriptor_dict['descriptors'] model_dy.vision_net.reference_image = image_data['rgb'] model_dy.vision_net.reference_indices = ref_descriptor_dict['indices'] else: metadata_file = os.path.join(get_data_root(), config['dataset']['descriptor_keypoints_dir'], 'metadata.p') descriptor_metadata = load_pickle(metadata_file) # [32, 2] ref_descriptors = torch.Tensor(descriptor_metadata['ref_descriptors']) # [K, 2] ref_descriptors = ref_descriptors[:K] model_dy.vision_net._ref_descriptors.data = ref_descriptors model_dy.vision_net._ref_descriptors_metadata = descriptor_metadata # this is just a sanity check assert model_dy.vision_net.num_ref_descriptors == K print("reference_descriptors", model_dy.vision_net._ref_descriptors) # criterion criterionMSE = nn.MSELoss() l1Loss = nn.L1Loss() # optimizer params = model_dy.parameters() lr = float(config['train']['lr']) optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999)) # setup scheduler sc = config['train']['lr_scheduler'] scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=sc['factor'], patience=sc['patience'], threshold_mode=sc['threshold_mode'], cooldown= sc['cooldown'], verbose=True) if use_gpu: print("using gpu") model_dy = model_dy.cuda() print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device) print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net))) best_valid_loss = np.inf global_iteration = 0 epoch_counter_external = 0 try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model_dy.train(phase == 'train') meter_loss_rmse = AverageMeter() step_duration_meter = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): step_start_time = time.time() global_iteration += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config['train']['n_rollout'] n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" %(global_iteration)) # visual_observations = data['visual_observations'] visual_observations_list = data['visual_observations_list'] observations = data['observations'] actions = data['actions'] if use_gpu: observations = observations.cuda() actions = actions.cuda() # states, actions = data assert actions.size(1) == n_samples B = actions.size(0) loss_mse = 0. # compute the output of the visual model for all timesteps visual_model_output_list = [] for visual_obs in visual_observations_list: # visual_obs is a dict containing observation for a single # time step (of course across a batch however) # visual_obs[<camera_name>]['rgb_tensor'] has shape [B, 3, H, W] # probably need to cast input to cuda dynamics_net_input = None if use_precomputed_keypoints: # note precomputed descriptors stored on disk are of size # K = 32. We need to trim it down to the appropriate size # [B, K_disk, 2] where K_disk is num keypoints on disk keypoints = visual_obs[camera_name]['descriptor_keypoints'] # [B, 32, 2] where K is num keypoints keypoints = keypoints[:,:K] if DEBUG: print("keypoints.shape", keypoints.shape) dynamics_net_input = keypoints.flatten(start_dim=1) else: out_dict = model_dy.vision_net.forward(visual_obs) # [B, vision_model_out_dim] dynamics_net_input = out_dict['dynamics_net_input'] visual_model_output_list.append(dynamics_net_input) # concatenate this into a tensor # [B, n_samples, vision_model_out_dim] visual_model_output = torch.stack(visual_model_output_list, dim=1) # cast this to float so it can be concatenated below visual_model_output = visual_model_output.type_as(observations) if DEBUG: print('visual_model_output.shape', visual_model_output.shape) print("observations.shape", observations.shape) print("actions.shape", actions.shape) # states is gotten by concatenating visual_observations and observations # [B, n_samples, vision_model_out_dim + obs_dim] states = torch.cat((visual_model_output, observations), dim=-1) # state_cur: B x n_his x state_dim state_cur = states[:, :n_his] if DEBUG: print("states.shape", states.shape) for j in range(n_roll): if DEBUG: print("n_roll j: %d" %(j)) state_des = states[:, n_his + j] # action_cur: B x n_his x action_dim action_cur = actions[:, j : j + n_his] if actions is not None else None # state_pred: B x state_dim # state_pred: B x state_dim input = {'observation': state_cur, 'action': action_cur, } if DEBUG: print("state_cur.shape", state_cur.shape) print("action_cur.shape", action_cur.shape) state_pred = model_dy.dynamics_net(input) # normalize by diag to ensure the loss is in [0,1] range loss_mse_cur = criterionMSE(state_pred/diag, state_des/diag) loss_mse += loss_mse_cur / n_roll # l1Loss loss_l1 = l1Loss(state_pred, state_des) # update state_cur # state_pred.unsqueeze(1): B x 1 x state_dim # state_cur: B x n_his x state_dim state_cur = torch.cat([state_cur[:, 1:], state_pred.unsqueeze(1)], 1) meter_loss_rmse.update(np.sqrt(loss_mse.item()), B) step_duration_meter.update(time.time() - step_start_time) if phase == 'train': optimizer.zero_grad() loss_mse.backward() optimizer.step() if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0): log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', rmse: %.6f (%.6f)' % ( np.sqrt(loss_mse.item()), meter_loss_rmse.avg) log += ', step time %.6f' %(step_duration_meter.avg) step_duration_meter.reset() print(log) # log data to tensorboard # only do it once we have reached 100 iterations if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss_MSE/%s" %(phase), loss_mse.item(), global_iteration) writer.add_scalar("L1/%s" %(phase), loss_l1.item(), global_iteration) writer.add_scalar("L1_fraction/%s" %(phase), loss_l1.item()/diag, global_iteration) writer.add_scalar("RMSE average loss/%s" %(phase), meter_loss_rmse.avg, global_iteration) if phase == 'train' and i % config['train']['ckp_per_iter'] == 0: save_model(model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss) print(log) if phase == 'valid': if config['train']['lr_scheduler']['enabled']: scheduler.step(meter_loss_rmse.avg) # print("\nPhase == valid") # print("meter_loss_rmse.avg", meter_loss_rmse.avg) # print("best_valid_loss", best_valid_loss) if meter_loss_rmse.avg < best_valid_loss: best_valid_loss = meter_loss_rmse.avg save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model(model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def train_dynamics( config, train_dir, # str: directory to save output multi_episode_dict=None, visual_observation_function=None, metadata=None, spatial_descriptors_data=None, ): assert multi_episode_dict is not None # assert spatial_descriptors_idx is not None # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train'][ 'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) if metadata is not None: save_pickle(metadata, os.path.join(train_dir, 'metadata.p')) if spatial_descriptors_data is not None: save_pickle(spatial_descriptors_data, os.path.join(train_dir, 'spatial_descriptors.p')) training_stats = dict() training_stats_file = os.path.join(train_dir, 'training_stats.yaml') action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase, visual_observation_function=visual_observation_function) print("len(datasets[phase])", len(datasets[phase])) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers'], drop_last=True) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . if False: dataset = datasets["train"] data = dataset[0] print("data['observations_combined'].shape", data['observations_combined'].shape) print("data.keys()", data.keys()) print("data['observations_combined']", data['observations_combined'][0]) print("data['observations_combined'].shape", data['observations_combined'].shape) print("data['actions'].shape", data['actions'].shape) print("data['actions']\n", data['actions']) quit() ''' Build model for dynamics prediction ''' model_dy = build_dynamics_model(config) if config['dynamics_net'] == "mlp_weight_matrix": raise ValueError("can't use weight matrix with standard setup") # criterion criterionMSE = nn.MSELoss() l1Loss = nn.L1Loss() smoothL1 = nn.SmoothL1Loss() # optimizer params = model_dy.parameters() lr = float(config['train']['lr']) optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999)) # setup scheduler sc = config['train']['lr_scheduler'] scheduler = None if config['train']['lr_scheduler']['enabled']: if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau": scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=sc['factor'], patience=sc['patience'], threshold_mode=sc['threshold_mode'], cooldown=sc['cooldown'], verbose=True) elif config['train']['lr_scheduler']['type'] == "StepLR": step_size = config['train']['lr_scheduler']['step_size'] gamma = config['train']['lr_scheduler']['gamma'] scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma) else: raise ValueError("unknown scheduler type: %s" % (config['train']['lr_scheduler']['type'])) if use_gpu: print("using gpu") model_dy = model_dy.cuda() # print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device) # print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net))) best_valid_loss = np.inf valid_loss_type = config['train']['valid_loss_type'] global_iteration = 0 counters = {'train': 0, 'valid': 0} epoch_counter_external = 0 loss = 0 try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: # only validate at a certain frequency if (phase == "valid") and ( (epoch % config['train']['valid_frequency']) != 0): continue model_dy.train(phase == 'train') average_meter_container = dict() step_duration_meter = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): loss_container = dict() # store the losses for this step step_start_time = time.time() global_iteration += 1 counters[phase] += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" % (global_iteration)) print("n_samples", n_samples) # [B, n_samples, obs_dim] states = data['observations_combined'] # [B, n_samples, action_dim] actions = data['actions'] B = actions.shape[0] if use_gpu: states = states.cuda() actions = actions.cuda() # state_cur: B x n_his x state_dim # state_cur = states[:, :n_his] # [B, n_his, state_dim] state_init = states[:, :n_his] # We want to rollout n_roll steps # actions = [B, n_his + n_roll, -1] # so we want action_seq.shape = [B, n_roll, -1] action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx: action_end_idx, :] if DEBUG: print("states.shape", states.shape) print("state_init.shape", state_init.shape) print("actions.shape", actions.shape) print("action_seq.shape", action_seq.shape) # try using models_dy.rollout_model instead of doing this manually rollout_data = rollout_model(state_init=state_init, action_seq=action_seq, dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] state_rollout_pred = rollout_data['state_pred'] # [B, n_roll, state_dim] state_rollout_gt = states[:, n_his:] if DEBUG: print("state_rollout_gt.shape", state_rollout_gt.shape) print("state_rollout_pred.shape", state_rollout_pred.shape) # the loss function is between # [B, n_roll, state_dim] state_pred_err = state_rollout_pred - state_rollout_gt # everything is in 3D space now so no need to do any scaling # all the losses would be in meters . . . . loss_mse = criterionMSE(state_rollout_pred, state_rollout_gt) loss_l1 = l1Loss(state_rollout_pred, state_rollout_gt) loss_l2 = torch.norm(state_pred_err, dim=-1).mean() loss_smoothl1 = smoothL1(state_rollout_pred, state_rollout_gt) loss_smoothl1_final_step = smoothL1( state_rollout_pred[:, -1], state_rollout_gt[:, -1]) # compute losses at final step of the rollout mse_final_step = criterionMSE( state_rollout_pred[:, -1], state_rollout_gt[:, -1]) l2_final_step = torch.norm(state_pred_err[:, -1], dim=-1).mean() l1_final_step = l1Loss(state_rollout_pred[:, -1], state_rollout_gt[:, -1]) loss_container['mse'] = loss_mse loss_container['l1'] = loss_l1 loss_container['mse_final_step'] = mse_final_step loss_container['l1_final_step'] = l1_final_step loss_container['l2_final_step'] = l2_final_step loss_container['l2'] = loss_l2 loss_container['smooth_l1'] = loss_smoothl1 loss_container[ 'smooth_l1_final_step'] = loss_smoothl1_final_step # compute the loss loss = 0 for key, val in config['loss_function'].items(): if val['enabled']: loss += loss_container[key] * val['weight'] loss_container['loss'] = loss for key, val in loss_container.items(): if not key in average_meter_container: average_meter_container[key] = AverageMeter() average_meter_container[key].update(val.item(), B) step_duration_meter.update(time.time() - step_start_time) if phase == 'train': optimizer.zero_grad() loss.backward() optimizer.step() if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0): log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', l2: %.6f' % (loss_container['l2'].item()) log += ', l2_final_step: %.6f' % ( loss_container['l2_final_step'].item()) log += ', step time %.6f' % (step_duration_meter.avg) step_duration_meter.reset() print(log) # log data to tensorboard # only do it once we have reached 100 iterations if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss_train/%s" % (phase), loss.item(), global_iteration) for loss_type, loss_obj in loss_container.items(): plot_name = "Loss/%s/%s" % (loss_type, phase) writer.add_scalar(plot_name, loss_obj.item(), counters[phase]) if phase == 'train' and global_iteration % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], average_meter_container[valid_loss_type].avg, best_valid_loss) print(log) # record all average_meter losses for key, meter in average_meter_container.items(): writer.add_scalar("AvgMeter/%s/%s" % (key, phase), meter.avg, epoch) if phase == "train": if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "StepLR"): scheduler.step() if phase == 'valid': if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau"): scheduler.step( average_meter_container[valid_loss_type].avg) if average_meter_container[ valid_loss_type].avg < best_valid_loss: best_valid_loss = average_meter_container[ valid_loss_type].avg training_stats['epoch'] = epoch training_stats['global_iteration'] = counters['valid'] save_yaml(training_stats, training_stats_file) save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def evaluate_mpc( model_dy, # dynamics model env, # the environment episode, # OnlineEpisodeReader mpc_input_builder, # DynamicsModelInputBuilder planner, # RandomShooting planner eval_indices=None, goal_func=None, # function that gets goal from observation config=None, wait_for_user_input=False, save_dir=None, model_name="", experiment_name="", generate_initial_condition_func=None, # (optional) function to generate initial condition, takes episode length N as parameter ): if not os.path.exists(save_dir): os.makedirs(save_dir) # must specify initial condition distribution assert generate_initial_condition_func is not None save_yaml(config, os.path.join(save_dir, 'config.yaml')) writer = SummaryWriter(log_dir=save_dir) pandas_data_list = [] for episode_length in config['eval']['episode_length']: counter = 0 seed = 0 while counter < config['eval']['num_episodes']: start_time = time.time() seed += 1 set_seed(seed) # make it repeatable # initial_cond = generate_initial_condition(config, N=episode_length) initial_cond = generate_initial_condition_func(N=episode_length) env.set_initial_condition_from_dict(initial_cond) action_sequence_np = torch_utils.cast_to_numpy( initial_cond['action_sequence']) episode_data = mpc_single_episode( model_dy=model_dy, env=env, action_sequence=action_sequence_np, action_zero=np.zeros(2), episode=episode, mpc_input_builder=mpc_input_builder, planner=planner, eval_indices=eval_indices, goal_func=goal_func, config=config, wait_for_user_input=wait_for_user_input, ) # continue if invalid if not episode_data['valid']: print("invalid episode, skipping") continue pose_error = compute_pose_error( obs=episode_data['obs_mpc_final'], obs_goal=episode_data['obs_goal'], ) object_delta = compute_pose_error( obs=episode_data['obs_init'], obs_goal=episode_data['obs_goal']) print("object_delta\n", object_delta) if wait_for_user_input: print("pose_error\n", pose_error) pandas_data = { 'episode_length': episode_length, 'seed': counter, 'model_name': model_name, 'experiment_name': experiment_name, 'object_pos_delta': object_delta['position_error'], 'object_angle_delta': object_delta['angle_error'], 'object_angle_delta_degrees': object_delta['angle_error_degrees'], } pandas_data.update(pose_error) pandas_data_list.append(pandas_data) # log to tensorboard for key, val in pose_error.items(): plot_name = "%s/episode_len_%d" % (key, episode_length) writer.add_scalar(plot_name, val, counter) writer.flush() print("episode [%d/%d], episode_length %d, duration %.2f" % (counter, config['eval']['num_episodes'], episode_length, time.time() - start_time)) counter += 1 df_tmp = pd.DataFrame(pandas_data_list) keys = ["angle_error_degrees", "position_error"] for key in keys: for i in range(10): mean = df_tmp[key][df_tmp.episode_length == episode_length].mean() median = df_tmp[key][df_tmp.episode_length == episode_length].median() plot_name_mean = "mean/%s/episode_len_%d" % (key, episode_length) writer.add_scalar(plot_name_mean, mean, i) plot_name_median = "median/%s/episode_len_%d" % ( key, episode_length) writer.add_scalar(plot_name_median, median, i) # save some data df = pd.DataFrame(pandas_data_list) df.to_csv(os.path.join(save_dir, "data.csv"))
def train_transporter( config, train_dir, ckp_dir=None, multi_episode_dict=None, ): assert multi_episode_dict is not None if ckp_dir is None: ckp_dir = os.path.join(train_dir) if not os.path.exists(ckp_dir): os.makedirs(ckp_dir) tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) images_dir = os.path.join(train_dir, 'images') if not os.path.exists(images_dir): os.makedirs(images_dir) # save the config save_yaml(config, os.path.join(train_dir, 'config.yaml')) # set random seed for reproduction set_seed(config['train_transporter']['random_seed']) writer = SummaryWriter(log_dir=tensorboard_dir) # only use images from this specific config camera_names = [config['perception']['camera_name']] ### data datasets = {} dataloaders = {} for phase in ['train', 'valid']: datasets[phase] = ImageTupleDataset(config, phase=phase, episodes=multi_episode_dict, tuple_size=2, camera_names=camera_names) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train_transporter']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train_transporter']['batch_size']) use_gpu = torch.cuda.is_available() crop_enabled = datasets['train'].crop_enabled rgb_tensor_key = None if crop_enabled: rgb_image_key = "rgb_crop" rgb_tensor_key = "rgb_crop_tensor" else: rgb_image_key = "rgb_masked_scaled" rgb_tensor_key = "rgb_masked_scaled_tensor" if False: dataset = datasets["train"] dataset_size = len(dataset) print("len(dataset)", len(dataset)) print("len(dataset._image_dataset)", len(dataset._image_dataset)) print("len(dataset['valid'])", len(datasets['valid'])) print("len(dataset['train'])", len(datasets['train'])) print("dataset.crop_enabled", dataset.crop_enabled) data = dataset[0] print("data.keys()", data.keys()) print("data[0].keys()", data[0].keys()) # rgb_crop_tensor = data[0]['rgb_crop_tensor'] # print("rgb_crop_tensor.max()", rgb_crop_tensor.max()) # print("rgb_crop_tensor.min()", rgb_crop_tensor.min()) # # rgb_image = data[0]['rgb_masked_scaled'] # print("rgb_crop.dtype", rgb_image.dtype) # print("rgb_image.shape", rgb_image.shape) rgb_image = data[0][rgb_image_key] rgb_tensor = data[0][rgb_tensor_key] print("rgb_image.shape", rgb_image.shape) print("rgb_tensor.shape", rgb_tensor.shape) plt.figure() # plt.imshow(rgb_image) plt.imshow(data[0][rgb_image_key]) plt.show() quit() # ### model model_kp = Transporter(config, use_gpu=use_gpu) print("model_kp #params: %d" % count_parameters(model_kp)) if config['train_transporter']['resume_epoch'] >= 0: model_kp_path = os.path.join( ckp_dir, 'net_kp_epoch_%d_iter_%d.pth' % (config['train_transporter']['resume_epoch'], config['train_transporter']['resume_iter'])) print("Loading saved ckp from %s" % model_kp_path) model_kp.load_state_dict(torch.load(model_kp_path)) # criterion criterionMSE = nn.MSELoss() # optimizer params = model_kp.parameters() optimizer = optim.Adam(params, lr=float(config['train_transporter']['lr']), betas=(config['train_transporter']['adam_beta1'], 0.999)) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.6, patience=2, verbose=True) if use_gpu: model_kp = model_kp.cuda() best_valid_loss = np.inf global_iteration = 0 log_fout = open(os.path.join(ckp_dir, 'log.txt'), 'w') n_epoch = config['train_transporter']['n_epoch'] for epoch in range(n_epoch): phases = ['train', 'valid'] writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model_kp.train(phase == 'train') meter_loss = AverageMeter() loader = dataloaders[phase] bar = ProgressBar(max_value=len(loader)) for i, data in bar(enumerate(loader)): with torch.set_grad_enabled(phase == 'train'): src = data[0][rgb_tensor_key] des = data[1][rgb_tensor_key] if use_gpu: src = src.cuda() des = des.cuda() des_pred = model_kp(src, des) # reconstruction loss loss = criterionMSE(des_pred, des) meter_loss.update(loss.item(), src.size(0)) if phase == 'train': optimizer.zero_grad() loss.backward() optimizer.step() if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss/%s" % (phase), loss.item(), global_iteration) if i % config['train_transporter']['log_per_iter'] == 0: log = '%s [%d/%d][%d/%d] LR: %.6f, Loss: %.6f (%.6f)' % ( phase, epoch, n_epoch, i, len(loader), get_lr(optimizer), loss.item(), meter_loss.avg) print() print(log) log_fout.write(log + '\n') log_fout.flush() if phase == 'train' and i % config['train_transporter'][ 'ckp_per_iter'] == 0: torch.save( model_kp.state_dict(), '%s/net_kp_epoch_%d_iter_%d.pth' % (ckp_dir, epoch, i)) # compute some images and draw them if global_iteration % config['train_transporter'][ 'image_per_iter'] == 0: with torch.no_grad(): kp = model_kp.predict_keypoint(des) heatmap = model_kp.keypoint_to_heatmap( kp, inv_std=config['perception']['inv_std']) images = visualize_transporter_output( des=des, des_pred=des_pred, heatmap=heatmap, kp=kp) print("images[0].shape", images[0].shape) save_img = np.concatenate(images[:4], axis=0) print("save_img.dtype", save_img.dtype) print("save_img.shape", save_img.shape) save_file = os.path.join( images_dir, '%s_epoch_%d_iter_%d.png' % (phase, epoch, i)) cv2.imwrite(save_file, save_img) pass writer.flush() # flush SummaryWriter events to disk global_iteration += 1 log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, n_epoch, meter_loss.avg, best_valid_loss) print(log) log_fout.write(log + '\n') log_fout.flush() if phase == 'valid': scheduler.step(meter_loss.avg) if meter_loss.avg < best_valid_loss: best_valid_loss = meter_loss.avg torch.save(model_kp.state_dict(), '%s/net_best.pth' % ckp_dir) log_fout.close()
def eval_dynamics( config, eval_dir, # str: directory to save output multi_episode_dict=None, n_rollout_list=None, model_dy=None, # should already be in eval mode phase_list=None, # typically it's num_epochs=10, ): assert n_rollout_list is not None assert model_dy is not None assert multi_episode_dict is not None if phase_list is None: phase_list = ["valid"] # set random seed for reproduction set_seed(config['train']['random_seed']) tensorboard_dir = os.path.join(eval_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(eval_dir, "config.yaml")) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) use_gpu = torch.cuda.is_available() best_valid_loss = np.inf global_iteration = 0 counters = {'train': 0, 'valid': 0} epoch_counter_external = 0 stats = dict() for n_rollout in n_rollout_list: stats[n_rollout] = dict() config_tmp = copy.copy(config) config_tmp['train']['n_rollout'] = n_rollout for phase in phase_list: stats[n_rollout][phase] = dict() print("Loading data for %s" % phase) dataset = MultiEpisodeDataset( config_tmp, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase) dataloader = DataLoader(dataset, batch_size=config['train']['batch_size'], shuffle=True, num_workers=config['train']['num_workers'], drop_last=True) loss_tensor_container = {"l2_avg": [], "l2_final_step": []} step_duration_meter = AverageMeter() global_iteration = 0 for epoch in range(num_epochs): for i, data in enumerate(dataloader): loss_container = dict() # store the losses for this step # types of losses ["l2_avg", "l2_final_step"] step_start_time = time.time() global_iteration += 1 counters[phase] += 1 with torch.no_grad(): n_his = config['train']['n_history'] n_roll = n_rollout n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" % (global_iteration)) print("n_samples", n_samples) # [B, n_samples, obs_dim] observations = data['observations'] # [B, n_samples, action_dim] actions = data['actions'] B = actions.shape[0] if use_gpu: observations = observations.cuda() actions = actions.cuda() # states, actions = data assert actions.shape[1] == n_samples loss_mse = 0. # we don't have any visual observations, so states are observations states = observations # state_cur: B x n_his x state_dim # state_cur = states[:, :n_his] # [B, n_his, state_dim] state_init = states[:, :n_his] # We want to rollout n_roll steps # actions = [B, n_his + n_roll, -1] # so we want action_seq.shape = [B, n_roll, -1] action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx: action_end_idx, :] if DEBUG: print("states.shape", states.shape) print("state_init.shape", state_init.shape) print("actions.shape", actions.shape) print("action_seq.shape", action_seq.shape) # try using models_dy.rollout_model instead of doing this manually rollout_data = rollout_model(state_init=state_init, action_seq=action_seq, dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] state_rollout_pred = rollout_data['state_pred'] # [B, n_roll, state_dim] state_rollout_gt = states[:, n_his:] if DEBUG: print("state_rollout_gt.shape", state_rollout_gt.shape) print("state_rollout_pred.shape", state_rollout_pred.shape) # the loss function is between # [B, n_roll, state_dim] state_pred_err = state_rollout_pred - state_rollout_gt # [B] l2_avg_tensor = torch.mean(torch.norm(state_pred_err, dim=-1), dim=1).detach().cpu() l2_avg = l2_avg_tensor.mean() # [B] l2_final_step_tensor = torch.norm( state_pred_err[:, -1], dim=-1).detach().cpu() l2_final_step = l2_final_step_tensor.mean() loss_tensor_container["l2_avg"].append(l2_avg_tensor) loss_container["l2_avg"] = l2_avg loss_tensor_container["l2_final_step"].append( l2_final_step_tensor) loss_container["l2_final_step"] = l2_final_step step_duration_meter.update(time.time() - step_start_time) if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0): # print some logging information log = "" log += ', step time %.6f' % (step_duration_meter.avg) # log data to tensorboard for loss_type, loss_obj in loss_container.items(): plot_name = "%s/n_roll_%s/%s" % (loss_type, n_roll, phase) writer.add_scalar(plot_name, loss_obj.item(), global_iteration) log += " %s: %.6f," % (plot_name, loss_obj.item()) print(log) writer.flush() # flush SummaryWriter events to disk stats[n_rollout][phase] = dict() for loss_type in loss_tensor_container: t = torch.cat(loss_tensor_container[loss_type]) mean = t.mean() median = t.median() std = t.std() stats[n_rollout][phase][loss_type] = { 'mean': mean, 'median': median, 'std': std } for stat_type, val in stats[n_rollout][phase][loss_type].items( ): plot_name = "stats/%s/n_roll_%d/%s/%s" % ( loss_type, n_roll, phase, stat_type) for idx_tmp in [0, 10, 100]: writer.add_scalar(plot_name, val, idx_tmp)
def train_explore_and_learn( config, train_dir, # str: directory to save output data_dir, visualize=False): # set random seed for reproduction set_seed(config['train_explore_and_learn']['random_seed']) tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) print(config) num_exploration_rounds = config['train_explore_and_learn'][ 'num_exploration_rounds'] num_episodes_per_exploration_round = config['train_explore_and_learn'][ 'num_episodes_per_exploration_round'] num_timesteps = config['train_explore_and_learn']['num_timesteps'] # model_folder = os.path.join(train_dir, "../2020-04-05-23-00-30-887903") # model_file = os.path.join(model_folder, "net_best_dy_model.pth") # model_dy = torch.load(model_file) model_dy = None global_iteration = 0 ##### setup to store the dataset metadata = dict() metadata['episodes'] = dict() # data collector data_collector = DrakePusherSliderEpisodeCollector(config) ##### explore and learn for idx_exploration_round in range(num_exploration_rounds): print("Exploration round %d / %d" % (idx_exploration_round, num_exploration_rounds)) ### exploration if idx_exploration_round == 0: # initial exploration exploration_type = 'random' else: exploration_type = 'mppi' collect_episodes( config, metadata, data_collector, num_episodes_per_exploration_round, data_dir, visualize, exploration_type, model_dy=None if exploration_type == 'random' else model_dy) save_yaml(metadata, os.path.join(data_dir, 'metadata.yaml')) ### optimize the dynamics model model_dy, global_iteration = train_dynamics(config, train_dir, data_dir, model_dy, global_iteration, writer)
def multiprocess_main(num_episodes=1000, num_threads=4): set_seed(500) # just randomly chosen start_time = time.time() config = load_yaml( os.path.join(get_project_root(), 'experiments/exp_20_mugs/config.yaml')) num_episodes_per_thread = math.ceil(num_episodes / num_threads) num_episodes = num_threads * num_episodes_per_thread # DATASET_NAME = "mugs_random_colors_%d" % (num_episodes) # DATASET_NAME = "single_mug_%d" # DATASET_NAME = "correlle_mug-small_single_color_%d" %(num_episodes) # DATASET_NAME = "single_corelle_mug_%d" %(num_episodes) # DATASET_NAME = "correlle_mug-small_many_colors_%d" %(num_episodes) DATASET_NAME = "correlle_mug-small_many_colors_random_%d" % (num_episodes) # OUTPUT_DIR = os.path.join(get_data_root(), 'sandbox', DATASET_NAME) OUTPUT_DIR = os.path.join(get_data_ssd_root(), 'dataset', DATASET_NAME) print("OUTPUT_DIR:", OUTPUT_DIR) output_dir = OUTPUT_DIR if not os.path.exists(output_dir): os.makedirs(output_dir) def f(q_tmp): config = load_yaml( os.path.join(get_project_root(), 'experiments/exp_20_mugs/config.yaml')) config['dataset']['num_episodes'] = num_episodes_per_thread out = collect_episodes(config, output_dir=OUTPUT_DIR, visualize=False, debug=False, run_from_thread=True) q_tmp.put(out) q = Queue() process_list = [] for i in range(num_threads): p = Process(target=f, args=(q, )) p.start() process_list.append(p) metadata = {'episodes': {}} for p in process_list: while p.is_alive(): p.join(timeout=1) # empty out the queue while not q.empty(): out = q.get() metadata['episodes'].update(out['metadata']['episodes']) # double check for p in process_list: p.join() time.sleep(1.0) print("All threads joined") elapsed = time.time() - start_time # collect the metadata.yaml files while not q.empty(): out = q.get() metadata['episodes'].update(out['metadata']['episodes']) save_yaml(metadata, os.path.join(OUTPUT_DIR, 'metadata.yaml')) print("Generating and saving dataset to disk took %d seconds" % (int(elapsed)))
def collect_episodes(config, output_dir=None, visualize=True, debug=False, run_from_thread=False, seed=None): # gets a random seed for each thread/process independently if seed is None: seed = np.random.RandomState().randint(0, 10000) set_seed(seed) if output_dir is None: output_dir = os.path.join(os.getcwd(), 'data') if not os.path.exists(output_dir): os.makedirs(output_dir) # save the config config_save_file = os.path.join(output_dir, 'config.yaml') save_yaml(config, config_save_file) # initialize config for DataCollector num_episodes = config['dataset']['num_episodes'] # record some metadata metadata = dict() metadata['episodes'] = dict() while (len(metadata['episodes']) < num_episodes): i = len(metadata['episodes']) if debug: input("Press Enter to continue...") print("\n") start_time = time.time() print("collecting episode %d of %d" % (i + 1, num_episodes)) name = "%s_idx_%d" % (get_current_YYYY_MM_DD_hh_mm_ss_ms(), i) n_his = config['train_dynamics']['n_history'] ic = generate_initial_condition( config=config, T_aug_enabled=True, n_his=n_his, randomize_velocity=True, randomize_sdf=True, randomize_color=True, ) env = DrakeMugsEnv(ic['config'], visualize=visualize) if debug: print("initial condition\n", ic) # set initial condition on environment if visualize: print("setting target realtime rate 1.0") env.simulator.set_target_realtime_rate(1.0) env.reset() context = env.get_mutable_context() env.set_object_position(context, ic['q_slider']) env.set_pusher_position(context, ic['q_pusher']) print("ic['action_sequence'].shape", ic['action_sequence'].shape) # simulate for 10 seconds to let the mug stabilize action_zero = env.get_zero_action() env.step(action_zero, dt=10.0) episode = collect_single_episode( env, action_seq=ic['action_sequence'])['episode_container'] # potentially discard it if the object didn't move during the data collection if len(episode._data['trajectory']) < 10: print("trajectory was too short, skipping") continue obs_start = episode._data['trajectory'][0]['observation'] obs_end = episode._data['trajectory'][-1]['observation'] q_slider_start = obs_start['slider']['position']['translation'] q_slider_end = obs_end['slider']['position']['translation'] dq_slider = obs_start['slider']['position']['translation'] - obs_end[ 'slider']['position']['translation'] if debug: print("len(episode._data['trajectory'])", len(episode._data['trajectory'])) print("q_slider_start", q_slider_start) print("q_slider_end", q_slider_end) print("dq_slider", dq_slider) print("np.linalg.norm(dq_slider)", np.linalg.norm(dq_slider)) pose_error = compute_pose_error(obs_start, obs_end) # if slider didn't move by at least 1 mm then discard this episode if (pose_error['position_error'] < 0.01) and (pose_error['angle_error_degrees'] < 10): print( "discarding episode since slider didn't move sufficiently far") continue print("saving to disk") metadata['episodes'][name] = dict() image_data_file = episode.save_images_to_hdf5(output_dir) non_image_data_file = episode.save_non_image_data_to_pickle(output_dir) print("output_dir:", output_dir) print("non_image_data.keys()", episode.non_image_data.keys()) metadata['episodes'][name]['non_image_data_file'] = non_image_data_file metadata['episodes'][name]['image_data_file'] = image_data_file print("done saving to disk") elapsed = time.time() - start_time print("single episode took: %.2f seconds" % (elapsed)) if not run_from_thread: save_yaml(metadata, os.path.join(output_dir, 'metadata.yaml')) print("Finished collecting episodes") return {'metadata': metadata}
def precompute_transporter_keypoints( multi_episode_dict, model_kp, output_dir, # str batch_size=10, num_workers=10, camera_names=None, model_file=None, ): assert model_file is not None metadata = dict() metadata['model_file'] = model_file save_yaml(metadata, os.path.join(output_dir, 'metadata.yaml')) start_time = time.time() log_freq = 10 device = next(model_kp.parameters()).device model_kp = model_kp.eval() # make sure model is in eval mode image_data_config = { 'rgb': True, 'mask': True, 'depth_int16': True, } # build all the dataset datasets = {} dataloaders = {} for episode_name, episode in multi_episode_dict.items(): single_episode_dict = {episode_name: episode} config = model_kp.config # need to do this since transporter type data sampling only works # with tuple_size = 1 dataset_config = copy.deepcopy(config) dataset_config['dataset']['use_transporter_type_data_sampling'] = False datasets[episode_name] = ImageTupleDataset( dataset_config, single_episode_dict, phase="all", image_data_config=image_data_config, tuple_size=1, compute_K_inv=True, camera_names=camera_names) dataloaders[episode_name] = DataLoader(datasets[episode_name], batch_size=batch_size, num_workers=num_workers, shuffle=False) episode_counter = 0 num_episodes = len(multi_episode_dict) for episode_name, dataset in datasets.items(): episode_counter += 1 print("\n\n") episode = multi_episode_dict[episode_name] hdf5_file = None try: hdf5_file = os.path.basename(episode.image_data_file) except AttributeError: hdf5_file = "%s.h5" % (episode.name) hdf5_file_fullpath = os.path.join(output_dir, hdf5_file) str_split = hdf5_file_fullpath.split(".") assert len(str_split) == 2 pickle_file_fullpath = str_split[0] + ".p" # print("episode_name", episode_name) # print("hdf5_file_fullpath", hdf5_file_fullpath) # print("pickle_file_fullpath", pickle_file_fullpath) if os.path.isfile(hdf5_file_fullpath): os.remove(hdf5_file_fullpath) if os.path.isfile(pickle_file_fullpath): os.remove(pickle_file_fullpath) episode_keypoint_data = dict() episode_start_time = time.time() with h5py.File(hdf5_file_fullpath, 'w') as hf: for i, data in enumerate(dataloaders[episode_name]): data = data[0] rgb_crop_tensor = data['rgb_crop_tensor'].to(device) crop_params = data['crop_param'] depth_int16 = data['depth_int16'] key_tree_joined = data['key_tree_joined'] # print("\n\n i = %d, idx = %d, camera_name = %s" %(i, data['idx'], data['camera_name'])) depth = depth_int16.float() * 1.0 / DEPTH_IM_SCALE if (i % log_freq) == 0: log_msg = "computing [%d/%d][%d/%d]" % ( episode_counter, num_episodes, i + 1, len(dataloaders[episode_name])) print(log_msg) B = rgb_crop_tensor.shape[0] _, H, W, _ = data['rgb'].shape kp_pred = None kp_pred_full_pixels = None with torch.no_grad(): kp_pred = model_kp.predict_keypoint(rgb_crop_tensor) # [B, n_kp, 2] kp_pred_full_pixels = transporter_utils.map_cropped_pixels_to_full_pixels_torch( kp_pred, crop_params) xy = kp_pred_full_pixels.clone() xy[:, :, 0] = (xy[:, :, 0]) * 2.0 / W - 1.0 xy[:, :, 1] = (xy[:, :, 1]) * 2.0 / H - 1.0 # debug # print("xy[0,0]", xy[0,0]) # get depth values kp_pred_full_pixels_int = kp_pred_full_pixels.type( torch.LongTensor) z = pdc_utils.index_into_batch_image_tensor( depth.unsqueeze(1), kp_pred_full_pixels_int.transpose(1, 2)) z = z.squeeze(1) K_inv = data['K_inv'] pts_camera_frame = pdc_torch_utils.pinhole_unprojection( kp_pred_full_pixels, z, K_inv) # print("pts_camera_frame.shape", pts_camera_frame.shape) pts_world_frame = pdc_torch_utils.transform_points_3D( data['T_W_C'], pts_camera_frame) # print("pts_world_frame.shape", pts_world_frame.shape) for j in range(B): keypoint_data = {} # this goes from [-1,1] keypoint_data['xy'] = torch_utils.cast_to_numpy(xy[j]) keypoint_data['uv'] = torch_utils.cast_to_numpy( kp_pred_full_pixels[j]) keypoint_data['uv_int'] = torch_utils.cast_to_numpy( kp_pred_full_pixels_int[j]) keypoint_data['z'] = torch_utils.cast_to_numpy(z[j]) keypoint_data[ 'pos_world_frame'] = torch_utils.cast_to_numpy( pts_world_frame[j]) keypoint_data[ 'pos_camera_frame'] = torch_utils.cast_to_numpy( pts_camera_frame[j]) # save out some data in both hdf5 and pickle format for key, val in keypoint_data.items(): save_key = key_tree_joined[ j] + "/transporter_keypoints/%s" % (key) hf.create_dataset(save_key, data=val) episode_keypoint_data[save_key] = val save_pickle(episode_keypoint_data, pickle_file_fullpath) print("duration: %.3f seconds" % (time.time() - episode_start_time))
def train_autoencoder(config, train_dir, ckp_dir=None, multi_episode_dict=None, type=None, # ["SpatialAutoencoder", . . .] ): assert multi_episode_dict is not None if ckp_dir is None: ckp_dir = os.path.join(train_dir, 'checkpoints') if not os.path.exists(ckp_dir): os.makedirs(ckp_dir) tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) images_dir = os.path.join(train_dir, 'images') if not os.path.exists(images_dir): os.makedirs(images_dir) # save the config save_yaml(config, os.path.join(train_dir, 'config.yaml')) # set random seed for reproduction set_seed(config['train_autoencoder']['random_seed']) camera_names = [config['perception']['camera_name']] model = None image_preprocess_func = None if type == "SpatialAutoencoder": model = SpatialAutoencoder.from_global_config(config) image_preprocess_func = functools.partial(spatial_autoencoder_image_preprocessing, H_in=model.input_image_shape[0], W_in=model.input_image_shape[1], H_out=model.output_image_shape[0], W_out=model.output_image_shape[1]) elif type == "ConvolutionalAutoencoder": model = ConvolutionalAutoencoder.from_global_config(config) image_preprocess_func = AutoencoderImagePreprocessFunctionFactory.convolutional_autoencoder(config) else: raise ValueError("unknown model type: %s" % (type)) writer = SummaryWriter(log_dir=tensorboard_dir) # only use images from this specific config ### data datasets = {} dataloaders = {} for phase in ['train', 'valid']: datasets[phase] = AutoencoderImageDataset(config, phase=phase, episodes=multi_episode_dict, camera_names=camera_names, image_preprocess_func=image_preprocess_func) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train_autoencoder']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train_autoencoder']['batch_size']) use_gpu = torch.cuda.is_available() params = model.parameters() optimizer = optim.Adam( params, lr=float(config['train_autoencoder']['lr']), betas=(config['train_autoencoder']['adam_beta1'], 0.999)) scheduler = None if config['train_autoencoder']['lr_scheduler']['enabled']: scheduler = ReduceLROnPlateau( optimizer, 'min', factor=0.6, patience=2, verbose=True) if use_gpu: model = model.cuda() best_valid_loss = np.inf global_iteration = 0 log_fout = open(os.path.join(ckp_dir, 'log.txt'), 'w') # criterion criterionMSE = nn.MSELoss() # a little test if False: data = datasets['train'][0] print(data.keys()) print("data['target_tensor'].shape", data['target_tensor'].shape) print("data['target_mask'].shape", data['target_mask'].shape) fig = plt.figure() ax = fig.subplots(2) target_img = data['target'] print("target_img.dtype", target_img.dtype) ax[0].imshow(data['input']) ax[1].imshow(data['target'], cmap='gray', vmin=0, vmax=255) plt.show() quit() # a little test if False: data = datasets['train'][0] print(data.keys()) print("data['target_tensor'].shape", data['target_tensor'].shape) print("data['target_mask'].shape", data['target_mask'].shape) fig = plt.figure() ax = fig.subplots(2) target_img = data['target'] target_tensor = data['target_tensor'].unsqueeze(0) target_tensor_np = torch_utils.convert_torch_image_to_numpy(target_tensor).squeeze() print("target_img.dtype", target_img.dtype) ax[0].imshow(target_img) ax[1].imshow(target_tensor_np) plt.show() quit() counters = {'train': 0, 'valid': 0} n_epoch = config['train_autoencoder']['n_epoch'] for epoch in range(n_epoch): phases = ['train', 'valid'] writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model.train(phase == 'train') meter_loss = AverageMeter() loader = dataloaders[phase] bar = ProgressBar(max_value=len(loader)) step_duration_meter = AverageMeter() epoch_start_time = time.time() prev_time = time.time() print("\n\n") for i, data in bar(enumerate(loader)): loss_container = dict() # store the losses for this step counters[phase] += 1 with torch.set_grad_enabled(phase == 'train'): input = data['input_tensor'] target = data['target_tensor'] if use_gpu: input = input.cuda() target = target.cuda() out = model(input) target_pred = out['output'] # print("target.shape", target.shape) # print("target_pred.shape", target_pred.shape) # reconstruction loss l2_recon = criterionMSE(target, target_pred) loss_container['l2_recon'] = l2_recon # loss_masked # [B, H', W'] mask = data['target_mask'].to(target.device) mask_idx = mask > 0 # convert to BHWC ordering so we can directly index target_masked = target.permute(0, 2, 3, 1)[mask_idx] target_pred_masked = target_pred.permute(0, 2, 3, 1)[mask_idx] # print('target_masked.shape', target_masked.shape) # print("target_pred_masked.shape", target_pred_masked.shape) l2_recon_masked = criterionMSE(target_masked, target_pred_masked) loss_container['l2_recon_masked'] = l2_recon_masked # compute the loss loss = 0 for key, val in config['train_autoencoder']['loss_function'].items(): if val['enabled']: loss += loss_container[key] * val['weight'] meter_loss.update(loss.item()) step_duration_meter.update(time.time() - prev_time) prev_time = time.time() if phase == 'train': optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(params, 1) optimizer.step() if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) # writer.add_scalar("Loss/%s" % (phase), loss.item(), global_iteration) writer.add_scalar("Loss_train/%s" % (phase), loss.item(), counters[phase]) for loss_type, loss_obj in loss_container.items(): plot_name = "Loss/%s/%s" % (loss_type, phase) writer.add_scalar(plot_name, loss_obj.item(), counters[phase]) if i % config['train_autoencoder']['log_per_iter'] == 0: log = '%s [%d/%d][%d/%d] LR: %.6f, Loss: %.6f (%.6f)' % ( phase, epoch, n_epoch, i, len(loader), get_lr(optimizer), loss.item(), meter_loss.avg) log += ', step time %.6f' % (step_duration_meter.avg) step_duration_meter.reset() print(log) log_fout.write(log + '\n') log_fout.flush() if phase == 'train' and i % config['train_autoencoder']['ckp_per_iter'] == 0: torch.save( model.state_dict(), '%s/net_kp_epoch_%d_iter_%d.pth' % (ckp_dir, epoch, i)) if i % config['train_autoencoder']['img_save_per_iter'] == 0: nrows = 4 ncols = 2 fig_width = 5 B, _, H, W = target.shape fig_height = fig_width * ((nrows * H) / (ncols * W)) figsize = (fig_width, fig_height) fig = plt.figure(figsize=figsize) ax = fig.subplots(nrows=nrows, ncols=ncols) target_np = torch_utils.convert_torch_image_to_numpy(target) target_pred_np = torch_utils.convert_torch_image_to_numpy(target_pred) for n in range(nrows): ax[n, 0].imshow(target_np[n]) ax[n, 1].imshow(target_pred_np[n]) save_file = os.path.join(images_dir, '%s_epoch_%d_iter_%d.png' %(phase, epoch, i)) fig.savefig(save_file) plt.close(fig) writer.flush() # flush SummaryWriter events to disk global_iteration += 1 log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, n_epoch, meter_loss.avg, best_valid_loss) print(log) print("Epoch Duration:", time.time() - epoch_start_time) log_fout.write(log + '\n') log_fout.flush() if phase == 'valid': if scheduler is not None: scheduler.step(meter_loss.avg) if meter_loss.avg < best_valid_loss: best_valid_loss = meter_loss.avg torch.save(model.state_dict(), '%s/net_best.pth' % ckp_dir) log_fout.close()
def collect_episodes(config, output_dir=None, visualize=True, debug=False): if output_dir is None: output_dir = os.path.join(os.getcwd(), 'data') if not os.path.exists(output_dir): os.makedirs(output_dir) # save the config config_save_file = os.path.join(output_dir, 'config.yaml') save_yaml(config, config_save_file) # initialize config for DataCollector dc = DrakePusherSliderEpisodeCollector(config, visualize=visualize) num_episodes = config['dataset']['num_episodes'] # record some metadata metadata = dict() metadata['episodes'] = dict() while (len(metadata['episodes']) < num_episodes): i = len(metadata['episodes']) if debug: input("Press Enter to continue...") print("\n") start_time = time.time() print("collecting episode %d of %d" % (i + 1, num_episodes)) name = "%s_idx_%d" % (get_current_YYYY_MM_DD_hh_mm_ss_ms(), i) ic = generate_initial_condition(config=config, T_aug_enabled=True) if debug: print("initial condition\n", ic) episode = dc.collect_single_episode(visualize=visualize, episode_name=name, q_pusher=ic['q_pusher'], q_slider=ic['q_slider'], # v_pusher=ic['v_pusher'], action_seq=ic['action_seq'], ) # potentially discard it if the object didn't move during the data collection if len(episode._data['trajectory']) < 10: print("trajectory was too short, skipping") continue obs_start = episode._data['trajectory'][5]['observation'] obs_end = episode._data['trajectory'][-1]['observation'] q_slider_start = obs_start['slider']['position']['translation'] q_slider_end = obs_end['slider']['position']['translation'] dq_slider = obs_start['slider']['position']['translation'] - obs_end['slider']['position']['translation'] if debug: print("len(episode._data['trajectory'])", len(episode._data['trajectory'])) print("q_slider_start", q_slider_start) print("q_slider_end", q_slider_end) print("dq_slider", dq_slider) print("np.linalg.norm(dq_slider)", np.linalg.norm(dq_slider)) # if slider didn't move by at least 1 mm then discard this episode if (np.linalg.norm(dq_slider) < 0.001): # one mm print("discarding episode since slider didn't move") continue print("saving to disk") metadata['episodes'][name] = dict() image_data_file = episode.save_images_to_hdf5(output_dir) non_image_data_file = episode.save_non_image_data_to_pickle(output_dir) print("output_dir:", output_dir) print("non_image_data.keys()", episode.non_image_data.keys()) metadata['episodes'][name]['non_image_data_file'] = non_image_data_file metadata['episodes'][name]['image_data_file'] = image_data_file print("done saving to disk") elapsed = time.time() - start_time print("single episode took: %.2f seconds" % (elapsed)) save_yaml(metadata, os.path.join(output_dir, 'metadata.yaml'))
def evaluate_mpc( config, # the global config dynamics_net, # the dynamics model vision_net, # the vision model save_dir, # str: directory to store results observation_function, env, dataset, # dataset ): # save config os.makedirs(save_dir) save_yaml(config, os.path.join(save_dir, 'config.yaml')) n_history = config['train']['n_history'] start_idx = n_history - 1 + config['eval']['start_idx'] end_idx = start_idx + config['eval']['episode_length'] camera_name = config['vision_net']['camera_name'] # build the planner planner = planner_from_config(config) episode_names = dataset.get_episode_names() episode_names.sort() num_episodes = min(config['eval']['num_episodes'], len(episode_names)) mpc_idx = 0 pandas_data_list = [] for i in range(num_episodes): mpc_idx += 1 episode_name = episode_names[i] episode = dataset.episode_dict[episode_name] data_goal = dataset._getitem(episode, end_idx, rollout_length=0, n_history=1) # goal_keypoints visual_observations = data_goal['visual_observations'] vision_net_out = vision_net.forward_visual_obs( data_goal['visual_observations']) goal_keypoints = vision_net_out['dynamics_net_input'].squeeze().cpu( ).numpy() debug_dict = { 'goal_data': data_goal, 'goal_vision_net_out': vision_net_out } # reset the simulator state env.reset() observation_full = episode.get_observation(start_idx) context = env.get_mutable_context() env.set_simulator_state_from_observation_dict(context, observation_full) folder_name = "episode_%d" % mpc_idx save_dir_tmp = os.path.join(save_dir, folder_name) os.makedirs(save_dir_tmp) # run the simulation for this episode mpc_out = mpc_episode_keypoint_observation( config=config, model_dy=dynamics_net, model_vision=vision_net, planner=planner, obs_goal=goal_keypoints, observation_function=observation_function, env=env, save_dir=save_dir_tmp, use_gpu=True, wait_for_user_input=False, debug_dict=debug_dict, visualize=True, verbose=False, video=True) # ground truth slider to world obs_goal = episode.get_observation(end_idx) T_W_S_goal = transform_utils.transform_from_pose_dict( obs_goal['slider']['position']) obs_final = mpc_out['debug_data'][-1]['obs'] # actual T_W_S at end of MPC rollout T_W_S = transform_utils.transform_from_pose_dict( obs_final['slider']['position']) # error between target and actual T_goal_S = np.matmul(np.linalg.inv(T_W_S_goal), T_W_S) pos_err = np.linalg.norm(T_goal_S[:3, 3]) axis, angle = transforms3d.axangles.mat2axangle(T_goal_S[:3, :3]) print("T_W_S[:3, 3]", T_W_S[:3, 3]) print("T_W_S_goal[:3, 3]", T_W_S_goal[:3, 3]) data = { 'position_error': pos_err, 'angle_error': abs(angle), 'angle_error_degrees': np.rad2deg(abs(angle)), } print("\ndata\n:", data) # parse the pandas data out pandas_data = mpc_out['pandas_data'] pandas_data.update(data) # record some additional data pandas_data['episode_name'] = episode_name pandas_data['mpc_idx'] = mpc_idx pandas_data['start_idx'] = start_idx pandas_data['end_idx'] = end_idx pandas_data['output_dir'] = folder_name pandas_data_list.append(pandas_data) # create dataframe and save to csv df = pd.DataFrame(pandas_data_list) df.to_csv(os.path.join(save_dir, "data.csv")) # record some simple info in a metadata.yaml reward_vec = np.array(df['reward']) metadata = dict() for key in ['reward', 'position_error', 'angle_error_degrees']: vec = df[key] metadata[key] = { 'mean': float(np.mean(vec)), 'median': float(np.median(vec)), 'std_dev': float(np.std(vec)) } save_yaml(metadata, os.path.join(save_dir, 'metadata.yaml'))
from key_dynam.utils.utils import load_pickle, save_pickle, save_yaml from key_dynam.utils import meshcat_utils from key_dynam.utils import transform_utils data_file = "/home/manuelli/data/key_dynam/hardware_experiments/closed_loop_rollouts/stable/2020-07-10-22-16-08_long_push_on_long_side/mpc_rollouts/2020-07-10-22-19-03-591910/data.p" data = load_pickle(data_file) pts = data['plan']['plan_data'][-1]['dynamics_model_input_data'][ 'visual_observation']['pts_W'] print("pts\n", pts) centroid = np.mean(pts, axis=0) pts_centered = pts - centroid save_data = {'object_points': pts_centered.tolist()} save_file = "object_points_master.yaml" save_yaml(save_data, save_file) # do some meshcat debug vis = meshcat_utils.make_default_visualizer_object() meshcat_utils.visualize_points(vis, "object_points_centered", pts_centered, color=[0, 0, 255], size=0.01) meshcat_utils.visualize_points(vis, "object_points_world", pts, color=[0, 255, 0], size=0.01)
def precompute_descriptors(multi_episode_dict, model, output_dir, # str batch_size=10, num_workers=10, model_file=None, ): if not os.path.isdir(output_dir): os.makedirs(output_dir) metadata = {'model_file': model_file} metadata_file = os.path.join(output_dir, 'metadata.yaml') save_yaml(metadata, metadata_file) start_time = time.time() log_freq = 10 device = next(model.parameters()).device model.eval() # make sure model is in eval mode # build all the dataset datasets = {} dataloaders = {} for episode_name, episode in iteritems(multi_episode_dict): single_episode_dict = {episode_name: episode} config = None datasets[episode_name] = ImageDataset(config, single_episode_dict, phase="all") dataloaders[episode_name] = DataLoader(datasets[episode_name], batch_size=batch_size, num_workers=num_workers, shuffle=False) episode_counter = 0 num_episodes = len(multi_episode_dict) for episode_name, dataset in iteritems(datasets): episode_counter += 1 print("\n\n") episode = multi_episode_dict[episode_name] hdf5_file = os.path.basename(episode.image_data_file) hdf5_file_fullpath = os.path.join(output_dir, hdf5_file) if os.path.isfile(hdf5_file_fullpath): os.remove(hdf5_file_fullpath) dataloader = dataloaders[episode_name] episode_start_time = time.time() with h5py.File(hdf5_file_fullpath, 'w') as hf: for i, data in enumerate(dataloaders[episode_name]): rgb_tensor = data['rgb_tensor'].to(device) key_tree_joined = data['key_tree_joined'] if (i % log_freq) == 0: log_msg = "computing [%d/%d][%d/%d]" % (episode_counter, num_episodes, i + 1, len(dataloader)) print(log_msg) # don't use gradients with torch.no_grad(): start_time = time.time() out = model.forward(rgb_tensor) print("forward took", time.time() - start_time) B, _, H, W = rgb_tensor.shape # iterate over elements in the batch start_time = time.time() for j in range(B): # [D, H, W] des_image = out['descriptor_image'][j].cpu().numpy() key = key_tree_joined[j] + "/descriptor_image" hf.create_dataset(key, data=des_image) print("saving images took", time.time() - start_time) print("duration: %.3f seconds" % (time.time() - episode_start_time)) print("total time to compute descriptors: %.3f seconds" % (time.time() - start_time))