예제 #1
0
def test_pusher_slider_dataset():
    # dataset, config = create_pusher_slider_dataset()

    project_root = get_project_root()
    config_file = os.path.join(project_root, "experiments/01/config.yaml")
    config = load_yaml(config_file)

    # new dataset loading approach
    episodes = load_episodes_from_config(config)
    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    dataset = MultiEpisodeDataset(config,
                                  action_function=action_function,
                                  observation_function=observation_function,
                                  episodes=episodes,
                                  phase="train")

    data = dataset[0]  # test the getitem
    print("type(data)", type(data))
    print("list(data)", list(data))

    print(type(data["observations"]))
    print("observations.shape", data["observations"].shape)
    print("actions.shape", data["actions"].shape)

    print("observations", data["observations"])
    print("actions", data["actions"])

    stats = dataset.compute_dataset_statistics()

    print("stats", stats)
예제 #2
0
def test_pusher_slider_keypoint_dataset():
    project_root = get_project_root()
    config_file = os.path.join(project_root, "experiments/02/config.yaml")
    config = load_yaml(config_file)

    config["n_history"] = 1
    config["n_roll"] = 0

    # new dataset loading approach
    episodes = load_episodes_from_config(config)
    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    dataset = MultiEpisodeDataset(config,
                                  action_function=action_function,
                                  observation_function=observation_function,
                                  episodes=episodes,
                                  phase="train")

    # dataset, config = create_pusher_slider_keypoint_dataset(config=config)

    episode_names = dataset.get_episode_names()
    episode_names.sort()
    episode_name = episode_names[0]
    episode = dataset.episode_dict[episode_name]
    obs_raw = episode.get_observation(0)
    obs_raw['slider']['angle'] = 0

    dataset.observation_function(obs_raw)

    print("20 degrees\n\n\n\n")
    obs_raw['slider']['angle'] = np.deg2rad(90)
    dataset.observation_function(obs_raw)
    quit()

    data = dataset[0]  # test the getitem
    print("type(data)", type(data))
    print("data.keys()", data.keys())

    print(type(data["observations"]))
    print("observations.shape", data["observations"].shape)
    print("actions.shape", data["actions"].shape)

    print("observations", data["observations"])
    print("actions", data["actions"])
예제 #3
0
def construct_dataset_from_config(
        config,  # dict for global config
        phase="train",  # str: either "train" or "valid"
        episodes=None,  # optional dict of episodes to use instead of loading data
):
    """
    Construct dataset based on global config
    :param config:
    :type config:
    :return:
    :rtype:
    """

    assert phase in ["train", "valid"]

    data_path = config["dataset"]["data_path"]
    if not os.path.isabs(data_path):
        data_path = os.path.join(get_project_root(), data_path)

    if episodes is None:
        # load the data if not passed in
        episodes = load_episodes_from_config(config)

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    dataset = MultiEpisodeDataset(config,
                                  action_function=action_function,
                                  observation_function=observation_function,
                                  episodes=episodes,
                                  phase=phase)

    return_data = {
        "dataset": dataset,
        "action_function": action_function,
        "observation_function": observation_function,
        "episodes": episodes,
    }

    return return_data
예제 #4
0
def train_dynamics(config,
                   train_dir, # str: directory to save output
                   multi_episode_dict, # multi_episode_dict
                   ):

    use_precomputed_keypoints = config['dataset']['visual_observation']['enabled'] and config['dataset']['visual_observation']['descriptor_keypoints']

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train']['resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))


    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(config,
                                              action_function=action_function,
                                              observation_function=observation_function,
                                              episodes=multi_episode_dict,
                                              phase=phase)

        dataloaders[phase] = DataLoader(
            datasets[phase], batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'], drop_last=True)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .


    '''
    define model for dynamics prediction
    '''

    model_dy = build_visual_dynamics_model(config)
    K = config['vision_net']['num_ref_descriptors']

    print("model_dy.vision_net._reference_descriptors.shape", model_dy.vision_net._ref_descriptors.shape)
    print("model_dy.vision_net.descriptor_dim", model_dy.vision_net.descriptor_dim)
    print("model_dy #params: %d" % count_trainable_parameters(model_dy))

    camera_name = config['vision_net']['camera_name']
    W = config['env']['rgbd_sensors']['sensor_list'][camera_name]['width']
    H = config['env']['rgbd_sensors']['sensor_list'][camera_name]['height']
    diag = np.sqrt(W**2 + H**2) # use this to scale the loss

    # sample reference descriptors unless using precomputed keypoints
    if not use_precomputed_keypoints:
        # sample reference descriptors
        episode_names = list(datasets["train"].episode_dict.keys())
        episode_names.sort()
        episode_name = episode_names[0]
        episode = datasets["train"].episode_dict[episode_name]
        episode_idx = 0
        camera_name = config["vision_net"]["camera_name"]
        image_data = episode.get_image_data(camera_name, episode_idx)
        des_img = torch.Tensor(image_data['descriptor'])
        mask_img = torch.Tensor(image_data['mask'])
        ref_descriptor_dict = sample_descriptors(des_img,
                                                 mask_img,
                                                 config['vision_net']['num_ref_descriptors'])



        model_dy.vision_net._ref_descriptors.data = ref_descriptor_dict['descriptors']
        model_dy.vision_net.reference_image = image_data['rgb']
        model_dy.vision_net.reference_indices = ref_descriptor_dict['indices']
    else:
        metadata_file = os.path.join(get_data_root(), config['dataset']['descriptor_keypoints_dir'], 'metadata.p')
        descriptor_metadata = load_pickle(metadata_file)

        # [32, 2]
        ref_descriptors = torch.Tensor(descriptor_metadata['ref_descriptors'])

        # [K, 2]
        ref_descriptors = ref_descriptors[:K]
        model_dy.vision_net._ref_descriptors.data = ref_descriptors
        model_dy.vision_net._ref_descriptors_metadata = descriptor_metadata

        # this is just a sanity check
        assert model_dy.vision_net.num_ref_descriptors == K

    print("reference_descriptors", model_dy.vision_net._ref_descriptors)

    # criterion
    criterionMSE = nn.MSELoss()
    l1Loss = nn.L1Loss()

    # optimizer
    params = model_dy.parameters()
    lr = float(config['train']['lr'])
    optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999))

    # setup scheduler
    sc = config['train']['lr_scheduler']
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=sc['factor'],
                                  patience=sc['patience'],
                                  threshold_mode=sc['threshold_mode'],
                                  cooldown= sc['cooldown'],
                                  verbose=True)

    if use_gpu:
        print("using gpu")
        model_dy = model_dy.cuda()

    print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device)
    print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net)))


    best_valid_loss = np.inf
    global_iteration = 0
    epoch_counter_external = 0

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:
                model_dy.train(phase == 'train')

                meter_loss_rmse = AverageMeter()
                step_duration_meter = AverageMeter()


                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    step_start_time = time.time()

                    global_iteration += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config['train']['n_rollout']
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" %(global_iteration))


                        # visual_observations = data['visual_observations']
                        visual_observations_list = data['visual_observations_list']
                        observations = data['observations']
                        actions = data['actions']

                        if use_gpu:
                            observations = observations.cuda()
                            actions = actions.cuda()

                        # states, actions = data
                        assert actions.size(1) == n_samples

                        B = actions.size(0)
                        loss_mse = 0.


                        # compute the output of the visual model for all timesteps
                        visual_model_output_list = []
                        for visual_obs in visual_observations_list:
                            # visual_obs is a dict containing observation for a single
                            # time step (of course across a batch however)
                            # visual_obs[<camera_name>]['rgb_tensor'] has shape [B, 3, H, W]

                            # probably need to cast input to cuda
                            dynamics_net_input = None
                            if use_precomputed_keypoints:
                                # note precomputed descriptors stored on disk are of size
                                # K = 32. We need to trim it down to the appropriate size
                                # [B, K_disk, 2] where K_disk is num keypoints on disk
                                keypoints = visual_obs[camera_name]['descriptor_keypoints']


                                # [B, 32, 2] where K is num keypoints
                                keypoints = keypoints[:,:K]

                                if DEBUG:
                                    print("keypoints.shape", keypoints.shape)

                                dynamics_net_input = keypoints.flatten(start_dim=1)
                            else:
                                out_dict = model_dy.vision_net.forward(visual_obs)

                                # [B, vision_model_out_dim]
                                dynamics_net_input = out_dict['dynamics_net_input']

                            visual_model_output_list.append(dynamics_net_input)

                        # concatenate this into a tensor
                        # [B, n_samples, vision_model_out_dim]
                        visual_model_output = torch.stack(visual_model_output_list, dim=1)

                        # cast this to float so it can be concatenated below
                        visual_model_output = visual_model_output.type_as(observations)

                        if DEBUG:
                            print('visual_model_output.shape', visual_model_output.shape)
                            print("observations.shape", observations.shape)
                            print("actions.shape", actions.shape)

                        # states is gotten by concatenating visual_observations and observations
                        # [B, n_samples, vision_model_out_dim + obs_dim]
                        states = torch.cat((visual_model_output, observations), dim=-1)

                        # state_cur: B x n_his x state_dim
                        state_cur = states[:, :n_his]

                        if DEBUG:
                            print("states.shape", states.shape)

                        for j in range(n_roll):

                            if DEBUG:
                                print("n_roll j: %d" %(j))

                            state_des = states[:, n_his + j]

                            # action_cur: B x n_his x action_dim
                            action_cur = actions[:, j : j + n_his] if actions is not None else None

                            # state_pred: B x state_dim
                            # state_pred: B x state_dim
                            input = {'observation': state_cur,
                                     'action': action_cur,
                                     }

                            if DEBUG:
                                print("state_cur.shape", state_cur.shape)
                                print("action_cur.shape", action_cur.shape)

                            state_pred = model_dy.dynamics_net(input)

                            # normalize by diag to ensure the loss is in [0,1] range
                            loss_mse_cur = criterionMSE(state_pred/diag, state_des/diag)
                            loss_mse += loss_mse_cur / n_roll

                            # l1Loss
                            loss_l1 = l1Loss(state_pred, state_des)

                            # update state_cur
                            # state_pred.unsqueeze(1): B x 1 x state_dim
                            # state_cur: B x n_his x state_dim
                            state_cur = torch.cat([state_cur[:, 1:], state_pred.unsqueeze(1)], 1)

                            meter_loss_rmse.update(np.sqrt(loss_mse.item()), B)

                    step_duration_meter.update(time.time() - step_start_time)
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss_mse.backward()
                        optimizer.step()

                    if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0):
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase],
                            get_lr(optimizer))
                        log += ', rmse: %.6f (%.6f)' % (
                            np.sqrt(loss_mse.item()), meter_loss_rmse.avg)

                        log += ', step time %.6f' %(step_duration_meter.avg)
                        step_duration_meter.reset()


                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 100 iterations
                        if global_iteration > 100:
                            writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration)
                            writer.add_scalar("Loss_MSE/%s" %(phase), loss_mse.item(), global_iteration)
                            writer.add_scalar("L1/%s" %(phase), loss_l1.item(), global_iteration)
                            writer.add_scalar("L1_fraction/%s" %(phase), loss_l1.item()/diag, global_iteration)
                            writer.add_scalar("RMSE average loss/%s" %(phase), meter_loss_rmse.avg, global_iteration)

                    if phase == 'train' and i % config['train']['ckp_per_iter'] == 0:
                        save_model(model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i))



                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss)
                print(log)

                if phase == 'valid':
                    if config['train']['lr_scheduler']['enabled']:
                        scheduler.step(meter_loss_rmse.avg)

                    # print("\nPhase == valid")
                    # print("meter_loss_rmse.avg", meter_loss_rmse.avg)
                    # print("best_valid_loss", best_valid_loss)
                    if meter_loss_rmse.avg < best_valid_loss:
                        best_valid_loss = meter_loss_rmse.avg
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush() # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external))
        writer.flush() # flush SummaryWriter events to disk
def train_dynamics(
    config,
    train_dir,  # str: directory to save output
    multi_episode_dict=None,
    visual_observation_function=None,
    metadata=None,
    spatial_descriptors_data=None,
):
    assert multi_episode_dict is not None
    # assert spatial_descriptors_idx is not None

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train'][
        'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))

    if metadata is not None:
        save_pickle(metadata, os.path.join(train_dir, 'metadata.p'))

    if spatial_descriptors_data is not None:
        save_pickle(spatial_descriptors_data,
                    os.path.join(train_dir, 'spatial_descriptors.p'))

    training_stats = dict()
    training_stats_file = os.path.join(train_dir, 'training_stats.yaml')

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=multi_episode_dict,
            phase=phase,
            visual_observation_function=visual_observation_function)

        print("len(datasets[phase])", len(datasets[phase]))
        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'],
            drop_last=True)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .

    if False:
        dataset = datasets["train"]
        data = dataset[0]
        print("data['observations_combined'].shape",
              data['observations_combined'].shape)
        print("data.keys()", data.keys())

        print("data['observations_combined']",
              data['observations_combined'][0])
        print("data['observations_combined'].shape",
              data['observations_combined'].shape)
        print("data['actions'].shape", data['actions'].shape)
        print("data['actions']\n", data['actions'])
        quit()
    '''
    Build model for dynamics prediction
    '''
    model_dy = build_dynamics_model(config)
    if config['dynamics_net'] == "mlp_weight_matrix":
        raise ValueError("can't use weight matrix with standard setup")

    # criterion
    criterionMSE = nn.MSELoss()
    l1Loss = nn.L1Loss()
    smoothL1 = nn.SmoothL1Loss()

    # optimizer
    params = model_dy.parameters()
    lr = float(config['train']['lr'])
    optimizer = optim.Adam(params,
                           lr=lr,
                           betas=(config['train']['adam_beta1'], 0.999))

    # setup scheduler
    sc = config['train']['lr_scheduler']
    scheduler = None

    if config['train']['lr_scheduler']['enabled']:
        if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau":
            scheduler = ReduceLROnPlateau(optimizer,
                                          mode='min',
                                          factor=sc['factor'],
                                          patience=sc['patience'],
                                          threshold_mode=sc['threshold_mode'],
                                          cooldown=sc['cooldown'],
                                          verbose=True)
        elif config['train']['lr_scheduler']['type'] == "StepLR":
            step_size = config['train']['lr_scheduler']['step_size']
            gamma = config['train']['lr_scheduler']['gamma']
            scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
        else:
            raise ValueError("unknown scheduler type: %s" %
                             (config['train']['lr_scheduler']['type']))

    if use_gpu:
        print("using gpu")
        model_dy = model_dy.cuda()

    # print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device)
    # print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net)))

    best_valid_loss = np.inf
    valid_loss_type = config['train']['valid_loss_type']
    global_iteration = 0
    counters = {'train': 0, 'valid': 0}
    epoch_counter_external = 0
    loss = 0

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:

                # only validate at a certain frequency
                if (phase == "valid") and (
                    (epoch % config['train']['valid_frequency']) != 0):
                    continue

                model_dy.train(phase == 'train')

                average_meter_container = dict()

                step_duration_meter = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    loss_container = dict()  # store the losses for this step

                    step_start_time = time.time()

                    global_iteration += 1
                    counters[phase] += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" % (global_iteration))
                            print("n_samples", n_samples)

                        # [B, n_samples, obs_dim]
                        states = data['observations_combined']

                        # [B, n_samples, action_dim]
                        actions = data['actions']
                        B = actions.shape[0]

                        if use_gpu:
                            states = states.cuda()
                            actions = actions.cuda()

                        # state_cur: B x n_his x state_dim
                        # state_cur = states[:, :n_his]

                        # [B, n_his, state_dim]
                        state_init = states[:, :n_his]

                        # We want to rollout n_roll steps
                        # actions = [B, n_his + n_roll, -1]
                        # so we want action_seq.shape = [B, n_roll, -1]
                        action_start_idx = 0
                        action_end_idx = n_his + n_roll - 1
                        action_seq = actions[:, action_start_idx:
                                             action_end_idx, :]

                        if DEBUG:
                            print("states.shape", states.shape)
                            print("state_init.shape", state_init.shape)
                            print("actions.shape", actions.shape)
                            print("action_seq.shape", action_seq.shape)

                        # try using models_dy.rollout_model instead of doing this manually
                        rollout_data = rollout_model(state_init=state_init,
                                                     action_seq=action_seq,
                                                     dynamics_net=model_dy,
                                                     compute_debug_data=False)

                        # [B, n_roll, state_dim]
                        state_rollout_pred = rollout_data['state_pred']

                        # [B, n_roll, state_dim]
                        state_rollout_gt = states[:, n_his:]

                        if DEBUG:
                            print("state_rollout_gt.shape",
                                  state_rollout_gt.shape)
                            print("state_rollout_pred.shape",
                                  state_rollout_pred.shape)

                        # the loss function is between
                        # [B, n_roll, state_dim]
                        state_pred_err = state_rollout_pred - state_rollout_gt

                        # everything is in 3D space now so no need to do any scaling
                        # all the losses would be in meters . . . .
                        loss_mse = criterionMSE(state_rollout_pred,
                                                state_rollout_gt)
                        loss_l1 = l1Loss(state_rollout_pred, state_rollout_gt)
                        loss_l2 = torch.norm(state_pred_err, dim=-1).mean()
                        loss_smoothl1 = smoothL1(state_rollout_pred,
                                                 state_rollout_gt)
                        loss_smoothl1_final_step = smoothL1(
                            state_rollout_pred[:, -1], state_rollout_gt[:, -1])

                        # compute losses at final step of the rollout
                        mse_final_step = criterionMSE(
                            state_rollout_pred[:, -1], state_rollout_gt[:, -1])
                        l2_final_step = torch.norm(state_pred_err[:, -1],
                                                   dim=-1).mean()
                        l1_final_step = l1Loss(state_rollout_pred[:, -1],
                                               state_rollout_gt[:, -1])

                        loss_container['mse'] = loss_mse
                        loss_container['l1'] = loss_l1
                        loss_container['mse_final_step'] = mse_final_step
                        loss_container['l1_final_step'] = l1_final_step
                        loss_container['l2_final_step'] = l2_final_step
                        loss_container['l2'] = loss_l2
                        loss_container['smooth_l1'] = loss_smoothl1
                        loss_container[
                            'smooth_l1_final_step'] = loss_smoothl1_final_step

                        # compute the loss
                        loss = 0
                        for key, val in config['loss_function'].items():
                            if val['enabled']:
                                loss += loss_container[key] * val['weight']

                        loss_container['loss'] = loss

                        for key, val in loss_container.items():
                            if not key in average_meter_container:
                                average_meter_container[key] = AverageMeter()

                            average_meter_container[key].update(val.item(), B)

                    step_duration_meter.update(time.time() - step_start_time)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    if (i % config['train']['log_per_iter']
                            == 0) or (global_iteration %
                                      config['train']['log_per_iter'] == 0):
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))

                        log += ', l2: %.6f' % (loss_container['l2'].item())
                        log += ', l2_final_step: %.6f' % (
                            loss_container['l2_final_step'].item())

                        log += ', step time %.6f' % (step_duration_meter.avg)
                        step_duration_meter.reset()

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 100 iterations
                        if global_iteration > 100:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss_train/%s" % (phase),
                                              loss.item(), global_iteration)

                            for loss_type, loss_obj in loss_container.items():
                                plot_name = "Loss/%s/%s" % (loss_type, phase)
                                writer.add_scalar(plot_name, loss_obj.item(),
                                                  counters[phase])

                    if phase == 'train' and global_iteration % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_epoch_%d_iter_%d' %
                            (train_dir, epoch, i))

                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'],
                    average_meter_container[valid_loss_type].avg,
                    best_valid_loss)
                print(log)

                # record all average_meter losses
                for key, meter in average_meter_container.items():
                    writer.add_scalar("AvgMeter/%s/%s" % (key, phase),
                                      meter.avg, epoch)

                if phase == "train":
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "StepLR"):
                        scheduler.step()

                if phase == 'valid':
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "ReduceLROnPlateau"):
                        scheduler.step(
                            average_meter_container[valid_loss_type].avg)

                    if average_meter_container[
                            valid_loss_type].avg < best_valid_loss:
                        best_valid_loss = average_meter_container[
                            valid_loss_type].avg
                        training_stats['epoch'] = epoch
                        training_stats['global_iteration'] = counters['valid']
                        save_yaml(training_stats, training_stats_file)
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' %
            (train_dir, epoch_counter_external))
        writer.flush()  # flush SummaryWriter events to disk
예제 #6
0
def load_model_and_data(
    K_matrix=None,
    T_world_camera=None,
):

    dataset_name = "push_box_hardware"

    model_name = "DD_3D/2020-07-02-17-59-21-362337_DD_3D_n_his_2_T_aug"
    train_dir = os.path.join(
        get_data_root(),
        "dev/experiments/22/dataset_push_box_hardware/trained_models/dynamics")
    # train_dir = "/home/manuelli/data/key_dynam/dev/experiments/22/dataset_push_box_hardware/trained_models/dynamics"

    train_dir = os.path.join(train_dir, model_name)
    ckpt_file = os.path.join(train_dir, "net_best_dy_state_dict.pth")

    train_config = load_yaml(os.path.join(train_dir, 'config.yaml'))
    state_dict = torch.load(ckpt_file)

    # build dynamics model
    model_dy = build_dynamics_model(train_config)
    # print("state_dict.keys()", state_dict.keys())
    model_dy.load_state_dict(state_dict)
    model_dy = model_dy.eval()
    model_dy = model_dy.cuda()

    # load the dataset
    dataset_paths = get_dataset_paths(dataset_name)
    dataset_root = dataset_paths['dataset_root']
    episodes_config = dataset_paths['episodes_config']

    spatial_descriptor_data = load_pickle(
        os.path.join(train_dir, 'spatial_descriptors.p'))
    metadata = load_pickle(os.path.join(train_dir, 'metadata.p'))

    ref_descriptors = spatial_descriptor_data['spatial_descriptors']
    ref_descriptors = torch_utils.cast_to_torch(ref_descriptors).cuda()

    # dense descriptor model
    model_dd_file = metadata['model_file']
    model_dd = torch.load(model_dd_file)
    model_dd = model_dd.eval()
    model_dd = model_dd.cuda()

    camera_name = train_config['dataset']['visual_observation_function'][
        'camera_name']

    camera_info = None
    if (T_world_camera is not None) and (K_matrix is not None):
        camera_info = {
            "K": K_matrix,
            'T_world_camera': T_world_camera,
        }
    else:
        camera_info = get_spartan_camera_info(camera_name)

    camera_info['camera_name'] = camera_name
    visual_observation_function = \
        VisualObservationFunctionFactory.descriptor_keypoints_3D(config=train_config,
                                                                 camera_name=camera_name,
                                                                 model_dd=model_dd,
                                                                 ref_descriptors=ref_descriptors,
                                                                 K_matrix=camera_info['K'],
                                                                 T_world_camera=camera_info['T_world_camera'],
                                                                 )

    action_function = ActionFunctionFactory.function_from_config(train_config)
    observation_function = ObservationFunctionFactory.function_from_config(
        train_config)

    #### PLANNER #######
    planner = None
    # make a planner config
    planner_config = copy.copy(train_config)
    config_tmp = load_yaml(
        os.path.join(get_project_root(),
                     'experiments/exp_22_push_box_hardware/config_DD_3D.yaml'))
    planner_config['mpc'] = config_tmp['mpc']
    if PLANNER_TYPE == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif PLANNER_TYPE == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (PLANNER_TYPE))

    return {
        "model_dy": model_dy,
        'config': train_config,
        'spatial_descriptor_data': spatial_descriptor_data,
        'action_function': action_function,
        'observation_function': observation_function,
        'visual_observation_function': visual_observation_function,
        'planner': planner,
        'camera_info': camera_info,
    }
예제 #7
0
def evaluate_mpc_z_state(
    model_dir,
    config_planner_mpc=None,
    save_dir=None,
    planner_type=None,
    env_config=None,
    strict=True,
    generate_initial_condition_func=None,
    env_type="DrakePusherSliderEnv",
):
    assert save_dir is not None
    assert planner_type is not None
    assert env_config is not None
    assert generate_initial_condition_func is not None

    model_dict = load_model(model_dir, strict=strict)
    model_dy = model_dict['model_dy']['model_dy']
    config = model_dict['model_dy']['config']
    model_config = config

    model_kp = model_dict['model_kp']['model']
    config_kp = model_kp.config
    camera_name = config_kp['perception']['camera_name']

    # create the environment
    # create the environment
    env = None
    if env_type == "DrakePusherSliderEnv":
        env = DrakePusherSliderEnv(env_config, visualize=False)
    elif env_type == "DrakeMugsEnv":
        env = DrakeMugsEnv(env_config, visualize=False)
    else:
        raise ValueError("unknown env type: %s" % (env_type))
    env.reset()

    T_world_camera = env.camera_pose(camera_name)
    camera_K_matrix = env.camera_K_matrix(camera_name)
    mask_labels = env.get_labels_to_mask_list()

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.drake_pusher_position_3D(
        config)
    visual_observation_function = \
        VisualObservationFunctionFactory.function_from_config(config,
                                                              camera_name=camera_name,
                                                              model_kp=model_kp,
                                                              K_matrix=camera_K_matrix,
                                                              T_world_camera=T_world_camera,
                                                              mask_labels=mask_labels)

    episode = OnlineEpisodeReader()

    mpc_input_builder = DynamicsModelInputBuilder(
        observation_function=observation_function,
        visual_observation_function=visual_observation_function,
        action_function=action_function,
        episode=episode)

    def goal_func(obs_tmp):
        state_tmp = mpc_input_builder.get_state_input_single_timestep(
            {'observation': obs_tmp})['state']
        # z_dict= model_dy.compute_z_state(state_tmp.unsqueeze(0))
        # print("z_dict['z_object'].shape", z_dict['z_object'].shape)
        return model_dy.compute_z_state(
            state_tmp.unsqueeze(0))['z_object_flat']

    index_dict = get_object_and_robot_state_indices(model_config)
    object_indices = index_dict['object_indices']

    # make a planner config, same as model config but with mpc and eval sections
    # replaced
    planner_config = copy.copy(model_config)
    if config_planner_mpc is not None:
        planner_config['mpc'] = config_planner_mpc['mpc']
        planner_config['eval'] = config_planner_mpc['eval']

    planner = None
    if planner_type == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif planner_type == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (planner_type))

    # run a single iteration
    mpc_eval_drake_pusher_slider.evaluate_mpc(
        model_dy=model_dy,
        env=env,
        episode=episode,
        mpc_input_builder=mpc_input_builder,
        planner=planner,
        eval_indices=object_indices,
        goal_func=goal_func,
        config=planner_config,
        wait_for_user_input=False,
        save_dir=save_dir,
        model_name="test",
        experiment_name="test",
        generate_initial_condition_func=generate_initial_condition_func)

    return {'save_dir': save_dir}
예제 #8
0
def evaluate_mpc(model_dir,
                 config_planner_mpc=None,
                 save_dir=None,
                 planner_type=None,
                 env_config=None,
                 strict=True,
                 generate_initial_condition_func=None,
                 ):

    assert save_dir is not None
    assert planner_type is not None
    assert env_config is not None
    assert generate_initial_condition_func is not None

    model_data = model_builder.load_dynamics_model_from_folder(model_dir, strict=strict)
    model_dy = model_data['model_dy']
    model_dy = model_dy.eval()
    model_dy = model_dy.cuda()
    model_config = model_dy.config




    # create the environment
    env = DrakePusherSliderEnv(env_config, visualize=False)
    env.reset()

    observation_function = ObservationFunctionFactory.function_from_config(model_config)
    action_function = ActionFunctionFactory.function_from_config(model_config)
    episode = OnlineEpisodeReader()

    mpc_input_builder = DynamicsModelInputBuilder(observation_function=observation_function,
                                                  action_function=action_function,
                                                  visual_observation_function=None,
                                                  episode=episode)

    pts_GT = np.array(model_config['dataset']['observation_function']['GT_3D_object_points'])
    K = pts_GT.shape[0]  # num keypoints
    eval_indices = np.arange(3 * K)  # extract the keypoints

    def goal_func(obs_local):
        """
        Helper function for getting the goal state from an observation
        """
        return observation_function(obs_local)[eval_indices]


    # make a planner config, same as model config but with mpc and eval sections
    # replaced
    planner_config = copy.copy(model_config)
    if config_planner_mpc is not None:
        planner_config['mpc'] = config_planner_mpc['mpc']
        planner_config['eval'] = config_planner_mpc['eval']

    planner = None
    if planner_type == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif planner_type == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" %(planner_type))

    # run a single iteration
    mpc_eval_drake_pusher_slider.evaluate_mpc(model_dy=model_dy,
                                              env=env,
                                              episode=episode,
                                              mpc_input_builder=mpc_input_builder,
                                              planner=planner,
                                              eval_indices=eval_indices,
                                              goal_func=goal_func,
                                              config=planner_config,
                                              wait_for_user_input=False,
                                              save_dir=save_dir,
                                              model_name="test",
                                              experiment_name="test",
                                              generate_initial_condition_func=generate_initial_condition_func,
                                              )

    return {'save_dir': save_dir}
예제 #9
0
def load_model_and_data():
    dataset_name = "push_box_hardware"

    # model_name = "DD_2D/2020-06-24-22-22-58-234812_DD_3D_n_his_2" # this model is actually 3D
    # model_name = "DD_3D/2020-06-25-00-49-29-679042_DD_3D_n_his_2_T_aug"
    # model_name = "DD_3D/2020-06-25-00-39-29-020621_DD_3D_n_his_2"

    model_name = "DD_3D/2020-07-02-17-59-21-362337_DD_3D_n_his_2_T_aug"
    train_dir = "/home/manuelli/data/key_dynam/dev/experiments/22/dataset_push_box_hardware/trained_models/dynamics"

    train_dir = os.path.join(train_dir, model_name)
    ckpt_file = os.path.join(train_dir, "net_best_dy_state_dict.pth")

    config = load_yaml(os.path.join(train_dir, 'config.yaml'))
    state_dict = torch.load(ckpt_file)

    # build dynamics model
    model_dy = build_dynamics_model(config)
    # print("state_dict.keys()", state_dict.keys())
    model_dy.load_state_dict(state_dict)
    model_dy = model_dy.eval()
    model_dy = model_dy.cuda()

    spatial_descriptor_data = load_pickle(
        os.path.join(train_dir, 'spatial_descriptors.p'))
    metadata = load_pickle(os.path.join(train_dir, 'metadata.p'))

    # build dense-descriptor model
    model_dd_file = metadata['model_file']
    model_dd = torch.load(model_dd_file)
    model_dd = model_dd.eval()
    model_dd = model_dd.cuda()

    # load the dataset
    dataset_paths = get_dataset_paths(dataset_name)
    dataset_root = dataset_paths['dataset_root']
    episodes_config = dataset_paths['episodes_config']

    precomputed_vision_data_root = DD_utils.get_precomputed_data_root(
        dataset_name)['precomputed_data_root']

    # descriptor_keypoints_root = os.path.join(precomputed_vision_data_root, 'descriptor_keypoints')
    descriptor_keypoints_root = os.path.join(precomputed_vision_data_root,
                                             'descriptor_keypoints')

    multi_episode_dict = DynamicSpartanEpisodeReader.load_dataset(
        config=config,
        episodes_config=episodes_config,
        episodes_root=dataset_paths['dataset_root'],
        load_image_episode=True,
        precomputed_data_root=descriptor_keypoints_root,
        max_num_episodes=None)

    visual_observation_function = PrecomputedVisualObservationFunctionFactory.function_from_config(
        config,
        keypoint_idx=spatial_descriptor_data['spatial_descriptors_idx'])

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)
    dataset = MultiEpisodeDataset(
        config,
        action_function=action_function,
        observation_function=observation_function,
        episodes=multi_episode_dict,
        visual_observation_function=visual_observation_function,
        phase="valid",  # this means no data augmentation
    )

    #### PLANNER #######
    planner = None
    # make a planner config
    planner_config = copy.copy(model_dy.config)
    config_tmp = load_yaml(
        os.path.join(get_project_root(),
                     'experiments/exp_22_push_box_hardware/config_DD_3D.yaml'))
    planner_config['mpc'] = config_tmp['mpc']
    if PLANNER_TYPE == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif PLANNER_TYPE == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (PLANNER_TYPE))

    return {
        "model_dy": model_dy,
        'model_dd': model_dd,
        'dataset': dataset,
        'config': config,
        "multi_episode_dict": multi_episode_dict,
        'spatial_descriptor_data': spatial_descriptor_data,
        'planner': planner,
        'observation_function': observation_function,
        'action_function': action_function,
    }
def load_model_and_data():

    dataset_name = "push_box_hardware"

    # model_name = "DD_2D/2020-06-24-22-22-58-234812_DD_3D_n_his_2" # this model is actually 3D
    # model_name = "DD_3D/2020-06-25-00-49-29-679042_DD_3D_n_his_2_T_aug"
    # model_name = "DD_3D/2020-06-25-00-39-29-020621_DD_3D_n_his_2"

    model_name = "DD_3D/2020-07-02-17-59-21-362337_DD_3D_n_his_2_T_aug"
    train_dir = "/home/manuelli/data/key_dynam/dev/experiments/22/dataset_push_box_hardware/trained_models/dynamics"

    train_dir = os.path.join(train_dir, model_name)
    ckpt_file = os.path.join(train_dir, "net_best_dy_state_dict.pth")

    config = load_yaml(os.path.join(train_dir, 'config.yaml'))
    state_dict = torch.load(ckpt_file)

    # build dynamics model
    model_dy = build_dynamics_model(config)
    # print("state_dict.keys()", state_dict.keys())
    model_dy.load_state_dict(state_dict)
    model_dy = model_dy.eval()
    model_dy = model_dy.cuda()

    spatial_descriptor_data = load_pickle(
        os.path.join(train_dir, 'spatial_descriptors.p'))
    metadata = load_pickle(os.path.join(train_dir, 'metadata.p'))

    # load the dataset
    dataset_paths = get_dataset_paths(dataset_name)
    dataset_root = dataset_paths['dataset_root']
    episodes_config = dataset_paths['episodes_config']

    precomputed_vision_data_root = DD_utils.get_precomputed_data_root(
        dataset_name)['precomputed_data_root']

    # descriptor_keypoints_root = os.path.join(precomputed_vision_data_root, 'descriptor_keypoints')
    descriptor_keypoints_root = os.path.join(precomputed_vision_data_root,
                                             'descriptor_keypoints')

    multi_episode_dict = DynamicSpartanEpisodeReader.load_dataset(
        config=config,
        episodes_config=episodes_config,
        episodes_root=dataset_paths['dataset_root'],
        load_image_episode=True,
        precomputed_data_root=descriptor_keypoints_root,
        max_num_episodes=None)

    visual_observation_function = PrecomputedVisualObservationFunctionFactory.function_from_config(
        config,
        keypoint_idx=spatial_descriptor_data['spatial_descriptors_idx'])

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)
    dataset = MultiEpisodeDataset(
        config,
        action_function=action_function,
        observation_function=observation_function,
        episodes=multi_episode_dict,
        visual_observation_function=visual_observation_function,
        phase="valid",  # this means no data augmentation
    )

    return {
        "model_dy": model_dy,
        'dataset': dataset,
        'config': config,
        "multi_episode_dict": multi_episode_dict,
        'spatial_descriptor_data': spatial_descriptor_data,
    }
예제 #11
0
def evaluate_mpc(
    model_dir,
    config_planner_mpc=None,
    save_dir=None,
    planner_type=None,
    env_config=None,
    strict=True,
    generate_initial_condition_func=None,
):

    assert save_dir is not None
    assert planner_type is not None
    assert env_config is not None
    assert generate_initial_condition_func is not None

    model_data = load_model(model_dir, strict=strict)
    model_dy = model_data['model_dy']
    model_config = model_dy.config
    config = model_config

    model_dd = model_data['model_dd']

    # create the environment
    env = DrakePusherSliderEnv(env_config, visualize=False)
    env.reset()

    camera_name = model_data['metadata']['camera_name']

    # sanity check
    camera_name_in_training = model_config['dataset'][
        'visual_observation_function']['camera_name']
    assert camera_name == camera_name_in_training, "camera_names don't match: camera_name = %s, camera_name_in_trainig = %s" % (
        camera_name, camera_name_in_training)

    T_world_camera = env.camera_pose(camera_name)
    camera_K_matrix = env.camera_K_matrix(camera_name)
    spatial_descriptor_data = model_data['spatial_descriptor_data']
    ref_descriptors = torch.Tensor(
        spatial_descriptor_data['spatial_descriptors']).cuda()
    K = ref_descriptors.shape[0]

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.drake_pusher_position_3D(
        config)
    visual_observation_function = \
        VisualObservationFunctionFactory.function_from_config(config,
                                                              camera_name=camera_name,
                                                              model_dd=model_dd,
                                                              ref_descriptors=ref_descriptors,
                                                              K_matrix=camera_K_matrix,
                                                              T_world_camera=T_world_camera,
                                                              )

    episode = OnlineEpisodeReader()

    mpc_input_builder = DynamicsModelInputBuilder(
        observation_function=observation_function,
        visual_observation_function=visual_observation_function,
        action_function=action_function,
        episode=episode)

    def goal_func(obs_local):
        keypoints_dict = visual_observation_function(obs_local)
        return keypoints_dict['tensor']

    eval_indices = np.arange(3 * K)  # extract the keypoints

    # make a planner config, same as model config but with mpc and eval sections
    # replaced
    planner_config = copy.copy(model_config)
    if config_planner_mpc is not None:
        planner_config['mpc'] = config_planner_mpc['mpc']
        planner_config['eval'] = config_planner_mpc['eval']

    planner = None
    if planner_type == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif planner_type == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (planner_type))

    # run a single iteration
    mpc_eval_drake_pusher_slider.evaluate_mpc(
        model_dy=model_dy,
        env=env,
        episode=episode,
        mpc_input_builder=mpc_input_builder,
        planner=planner,
        eval_indices=eval_indices,
        goal_func=goal_func,
        config=planner_config,
        wait_for_user_input=False,
        save_dir=save_dir,
        model_name="test",
        experiment_name="test",
        generate_initial_condition_func=generate_initial_condition_func)

    return {'save_dir': save_dir}
예제 #12
0
def main():
    # load dynamics model
    model_dict = load_autoencoder_model()
    model = model_dict['model_dy']
    model_ae = model_dict['model_ae']
    visual_observation_function = model_dict['visual_observation_function']

    config = model.config

    env_config = load_yaml(
        os.path.join(get_project_root(),
                     'experiments/exp_18_box_on_side/config.yaml'))
    env_config['env']['observation']['depth_int16'] = True
    n_history = config['train']['n_history']

    # create the environment
    # create the environment
    env = DrakePusherSliderEnv(env_config)
    env.reset()

    # create another environment for doing rollouts
    env2 = DrakePusherSliderEnv(env_config, visualize=False)
    env2.reset()

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.drake_pusher_position_3D(
        config)

    episode = OnlineEpisodeReader()
    mpc_input_builder = DynamicsModelInputBuilder(
        observation_function=observation_function,
        visual_observation_function=visual_observation_function,
        action_function=action_function,
        episode=episode)

    vis = meshcat_utils.make_default_visualizer_object()
    vis.delete()

    initial_cond = get_initial_state()
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    obs_init = env.get_observation()

    # visualize starting position of the object
    print("obs_init.keys()", obs_init.keys())
    print("obs_init['slider']['position']", obs_init['slider']['position'])
    T = DrakePusherSliderEnv.object_position_from_observation(obs_init)
    vis['start_pose'].set_object(triad(scale=0.1))
    vis['state_pose'].set_transform(T)

    #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    # add just some large number of these
    episode.clear()
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    #### ROLLOUT THE ACTION SEQUENCE USING THE SIMULATOR ##########
    # rollout single action sequence using the simulator
    gt_rollout_data = env_utils.rollout_action_sequence(
        env, initial_cond['action_sequence'].cpu().numpy())
    env_obs_rollout_gt = gt_rollout_data['observations']
    gt_rollout_episode = gt_rollout_data['episode_reader']

    for i, env_obs in enumerate(gt_rollout_data['observations']):
        T = DrakePusherSliderEnv.object_position_from_observation(env_obs)
        vis_name = "GT_trajectory/%d" % (i)
        vis[vis_name].set_object(triad(scale=0.1))
        vis[vis_name].set_transform(T)

    action_state_gt = mpc_input_builder.get_action_state_tensors(
        start_idx=0, num_timesteps=N, episode=gt_rollout_episode)

    state_rollout_gt = action_state_gt['states']
    action_rollout_gt = action_state_gt['actions']
    z_object_rollout_gt = model.compute_z_state(
        state_rollout_gt)['z_object_flat']
    print('state_rollout_gt.shape', state_rollout_gt.shape)
    print("z_object_rollout_gt.shape", z_object_rollout_gt.shape)

    def goal_func(obs_tmp):
        state_tmp = mpc_input_builder.get_state_input_single_timestep(
            {'observation': obs_tmp})['state']
        return model.compute_z_state(
            state_tmp.unsqueeze(0))['z_object'].flatten()

    # using the vision model to get "goal" keypoints
    z_object_goal = goal_func(env_obs_rollout_gt[-1])
    z_object_goal_np = torch_utils.cast_to_numpy(z_object_goal)

    # input("press Enter to continue")

    #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    # add just some large number of these
    episode.clear()
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    # [n_history, state_dim]
    idx = episode.get_latest_idx()

    dyna_net_input = mpc_input_builder.get_dynamics_model_input(
        idx, n_history=n_history)
    state_init = dyna_net_input['states'].cuda()  # [n_history, state_dim]
    action_init = dyna_net_input['actions']  # [n_history, action_dim]

    print("state_init.shape", state_init.shape)
    print("action_init.shape", action_init.shape)
    print("n_history", n_history)

    action_seq_gt_torch = initial_cond['action_sequence']
    action_input = torch.cat(
        (action_init[:(n_history - 1)], action_seq_gt_torch), dim=0).cuda()
    print("action_input.shape", action_input.shape)

    # rollout using the ground truth actions and learned model
    # need to add the batch dim to do that
    z_init = model.compute_z_state(state_init)['z']
    rollout_pred = rollout_model(state_init=z_init.unsqueeze(0),
                                 action_seq=action_input.unsqueeze(0),
                                 dynamics_net=model,
                                 compute_debug_data=True)

    state_pred_rollout = rollout_pred['state_pred'].squeeze(0)

    print("state_pred_rollout.shape", state_pred_rollout.shape)
    # input("press Enter to continue")

    # check L2 distance between predicted and actual
    # basically comparing state_pred_rollout and state_rollout_gt
    print("state_rollout_gt[-1]\n", state_rollout_gt[-1])
    print("state_pred_rollout[-1]\n", state_pred_rollout[-1])

    index_dict = get_object_and_robot_state_indices(config)
    object_indices = index_dict['object_indices']

    # reset the environment and use the MPC controller to stabilize this
    # now setup the MPC to try to stabilize this . . . .
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    episode.clear()

    # add just some large number of these
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    # input("press Enter to continue")

    # make a planner config
    planner_config = copy.copy(config)
    config_tmp = load_yaml(
        os.path.join(get_project_root(),
                     'experiments/drake_pusher_slider/eval_config.yaml'))
    planner_config['mpc'] = config_tmp['mpc']

    planner_config['mpc']['mppi']['terminal_cost_only'] = TERMINAL_COST_ONLY
    planner_config['mpc']['random_shooting'][
        'terminal_cost_only'] = TERMINAL_COST_ONLY

    planner = None
    if PLANNER_TYPE == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif PLANNER_TYPE == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (PLANNER_TYPE))

    mpc_out = None
    action_seq_mpc = None
    state_pred_mpc = None
    counter = -1
    while True:
        counter += 1
        print("\n\n-----Running MPC Optimization: Counter (%d)-------" %
              (counter))

        obs_cur = env.get_observation()
        episode.add_observation_only(obs_cur)

        if counter == 0 or REPLAN:
            print("replanning")
            ####### Run the MPC ##########

            # [1, state_dim]

            n_look_ahead = N - counter
            if USE_FIXED_MPC_HORIZON:
                n_look_ahead = MPC_HORIZON
            elif USE_SHORT_HORIZON_MPC:
                n_look_ahead = min(MPC_HORIZON, N - counter)
            if n_look_ahead == 0:
                break

            start_idx = counter
            end_idx = counter + n_look_ahead

            print("start_idx:", start_idx)
            print("end_idx:", end_idx)
            print("n_look_ahead", n_look_ahead)

            # start_time = time.time()
            # idx of current observation
            idx = episode.get_latest_idx()
            mpc_start_time = time.time()
            mpc_input_data = mpc_input_builder.get_dynamics_model_input(
                idx, n_history=n_history)
            state_cur = mpc_input_data['states']
            action_his = mpc_input_data['actions']

            if SEED_WITH_NOMINAL_ACTIONS:
                action_seq_rollout_init = action_seq_gt_torch[
                    start_idx:end_idx]
            else:
                if mpc_out is not None:
                    action_seq_rollout_init = mpc_out['action_seq'][1:]
                    print("action_seq_rollout_init.shape",
                          action_seq_rollout_init.shape)

                    if action_seq_rollout_init.shape[0] < n_look_ahead:
                        num_steps = n_look_ahead - action_seq_rollout_init.shape[
                            0]
                        action_seq_zero = torch.zeros([num_steps, 2])

                        action_seq_rollout_init = torch.cat(
                            (action_seq_rollout_init, action_seq_zero), dim=0)
                        print("action_seq_rollout_init.shape",
                              action_seq_rollout_init.shape)
                else:
                    action_seq_rollout_init = None

            # run MPPI
            z_cur = None
            with torch.no_grad():
                z_cur = model.compute_z_state(
                    state_cur.unsqueeze(0).cuda())['z'].squeeze(0)

            if action_seq_rollout_init is not None:
                n_look_ahead = action_seq_rollout_init.shape[0]

            obs_goal = None
            print("z_object_rollout_gt.shape", z_object_rollout_gt.shape)
            if TRAJECTORY_GOAL:
                obs_goal = z_object_rollout_gt[start_idx:end_idx]

                print("n_look_ahead", n_look_ahead)
                print("obs_goal.shape", obs_goal.shape)

                # add the final goal state on as needed
                if obs_goal.shape[0] < n_look_ahead:
                    obs_goal_final = z_object_rollout_gt[-1].unsqueeze(0)
                    num_repeat = n_look_ahead - obs_goal.shape[0]
                    obs_goal_final_expand = obs_goal_final.expand(
                        [num_repeat, -1])
                    obs_goal = torch.cat((obs_goal, obs_goal_final_expand),
                                         dim=0)
            else:
                obs_goal = z_object_rollout_gt[-1]

            obs_goal = torch_utils.cast_to_numpy(obs_goal)
            print("obs_goal.shape", obs_goal.shape)

            seed = 1

            set_seed(seed)
            mpc_out = planner.trajectory_optimization(
                state_cur=z_cur,
                action_his=action_his,
                obs_goal=obs_goal,
                model_dy=model,
                action_seq_rollout_init=action_seq_rollout_init,
                n_look_ahead=n_look_ahead,
                eval_indices=object_indices,
                rollout_best_action_sequence=True,
                verbose=True,
                add_grid_action_samples=True,
            )

            print("MPC step took %.4f seconds" %
                  (time.time() - mpc_start_time))
            action_seq_mpc = torch_utils.cast_to_numpy(mpc_out['action_seq'])
            state_pred_mpc = torch_utils.cast_to_numpy(mpc_out['state_pred'])

        # Rollout with ground truth simulator dynamics
        env2.set_simulator_state_from_observation_dict(
            env2.get_mutable_context(), obs_cur)
        obs_mpc_gt = env_utils.rollout_action_sequence(
            env2, action_seq_mpc)['observations']

        vis['mpc_3D'].delete()
        vis['mpc_GT_3D'].delete()

        L = len(obs_mpc_gt)
        print("L", L)
        if L == 0:
            break
        for i in range(L):

            # ground truth rollout of the MPC action_seq
            name = "mpc_GT_3D/%d" % (i)
            T_W_obj = DrakePusherSliderEnv.object_position_from_observation(
                obs_mpc_gt[i])
            vis[name].set_object(triad(scale=0.1))
            vis[name].set_transform(T_W_obj)

        action_cur = action_seq_mpc[0]

        print("action_cur", action_cur)
        print("action_GT", initial_cond['action'])
        input("press Enter to continue")

        # add observation actions to the episode
        obs_cur = env.get_observation()
        episode.replace_observation_action(obs_cur, action_cur)

        # step the simulator
        env.step(action_cur)

        # update the trajectories, in case we aren't replanning
        action_seq_mpc = action_seq_mpc[1:]
        state_pred_mpc = state_pred_mpc[1:]

        pose_error = compute_pose_error(env_obs_rollout_gt[-1], obs_cur)

        print("position_error: %.3f" % (pose_error['position_error']))
        print("angle error degrees: %.3f" %
              (pose_error['angle_error_degrees']))

    obs_final = env.get_observation()

    pose_error = compute_pose_error(env_obs_rollout_gt[-1], obs_final)

    print("position_error: %.3f" % (pose_error['position_error']))
    print("angle error degrees: %.3f" % (pose_error['angle_error_degrees']))
예제 #13
0
def mpc_w_learned_dynamics(config,
                           train_dir,
                           mpc_dir,
                           state_dict_path=None,
                           keypoint_observation=False):

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    tee = Tee(os.path.join(mpc_dir, 'mpc.log'), 'w')

    print(config)

    use_gpu = torch.cuda.is_available()
    '''
    model
    '''
    if config['dynamics']['model_type'] == 'mlp':
        model_dy = DynaNetMLP(config)
    else:
        raise AssertionError("Unknown model type %s" %
                             config['dynamics']['model_type'])

    # print model #params
    print("model #params: %d" % count_trainable_parameters(model_dy))

    if state_dict_path is None:
        if config['mpc']['mpc_dy_epoch'] == -1:
            state_dict_path = os.path.join(train_dir, 'net_best_dy.pth')
        else:
            state_dict_path = os.path.join(
                train_dir, 'net_dy_epoch_%d_iter_%d.pth' % \
                (config['mpc']['mpc_dy_epoch'], config['mpc']['mpc_dy_iter']))

        print("Loading saved ckp from %s" % state_dict_path)

    model_dy.load_state_dict(torch.load(state_dict_path))
    model_dy.eval()

    if use_gpu:
        model_dy.cuda()

    criterionMSE = nn.MSELoss()

    # generate action/observation functions
    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    # planner
    planner = planner_from_config(config)
    '''
    env
    '''
    # set up goal
    obs_goals = np.array([[
        262.9843, 267.3102, 318.9369, 351.1229, 360.2048, 323.5128, 305.6385,
        240.4460, 515.4230, 347.8708
    ],
                          [
                              381.8694, 273.6327, 299.6685, 331.0925, 328.7724,
                              372.0096, 411.0972, 314.7053, 517.7299, 268.4953
                          ],
                          [
                              284.8728, 275.7985, 374.0677, 320.4990, 395.4019,
                              275.4633, 306.2896, 231.4310, 507.0849, 312.4057
                          ],
                          [
                              313.1638, 271.4258, 405.0255, 312.2325, 424.7874,
                              266.3525, 333.6973, 225.7708, 510.1232, 305.3802
                          ],
                          [
                              308.6859, 270.9629, 394.2789, 323.2781, 419.7905,
                              280.1602, 333.8901, 228.1624, 519.1964, 321.5318
                          ],
                          [
                              386.8067, 284.8947, 294.2467, 323.2223, 313.3221,
                              368.9970, 405.9415, 330.9298, 495.9970, 268.9920
                          ],
                          [
                              432.0219, 299.6021, 340.8581, 339.4676, 360.2354,
                              384.5515, 451.4394, 345.2190, 514.6357, 291.2043
                          ],
                          [
                              351.3389, 264.5325, 267.5279, 318.2321, 293.7460,
                              360.0423, 378.4428, 306.9586, 516.4390, 259.7810
                          ],
                          [
                              521.1902, 254.0693, 492.7884, 349.7861, 539.6320,
                              364.5190, 569.2258, 268.8824, 506.9431, 286.9752
                          ],
                          [
                              264.8554, 275.9547, 338.1317, 345.3435, 372.7012,
                              308.4648, 299.3454, 239.9245, 506.2117, 373.8413
                          ]])

    for mpc_idx in range(config['mpc']['num_episodes']):
        if keypoint_observation:
            mpc_episode_keypoint_observation(config,
                                             mpc_idx,
                                             model_dy,
                                             mpc_dir,
                                             planner,
                                             obs_goals[mpc_idx],
                                             action_function,
                                             observation_function,
                                             use_gpu=use_gpu)
        else:
            # not supported for now
            raise AssertionError("currently only support keypoint observation")
예제 #14
0
def eval_dynamics(config,
                  train_dir,
                  eval_dir,
                  state_dict_path=None,
                  keypoint_observation=False,
                  debug=False,
                  render_human=False):

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    tee = Tee(os.path.join(eval_dir, 'eval.log'), 'w')

    print(config)

    use_gpu = torch.cuda.is_available()
    '''
    model
    '''
    model_dy = DynaNetMLP(config)

    # print model #params
    print("model #params: %d" % count_trainable_parameters(model_dy))

    if state_dict_path is None:
        if config['eval']['eval_dy_epoch'] == -1:
            state_dict_path = os.path.join(train_dir, 'net_best_dy.pth')
        else:
            state_dict_path = os.path.join(
                train_dir, 'net_dy_epoch_%d_iter_%d.pth' % \
                (config['eval']['eval_dy_epoch'], config['eval']['eval_dy_iter']))

        print("Loading saved ckp from %s" % state_dict_path)

    model_dy.load_state_dict(torch.load(state_dict_path))
    model_dy.eval()

    if use_gpu:
        model_dy.cuda()

    criterionMSE = nn.MSELoss()
    bar = ProgressBar()

    st_idx = config['eval']['eval_st_idx']
    ed_idx = config['eval']['eval_ed_idx']

    # load the data
    episodes = load_episodes_from_config(config)

    # generate action/observation functions
    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    dataset = MultiEpisodeDataset(config,
                                  action_function=action_function,
                                  observation_function=observation_function,
                                  episodes=episodes,
                                  phase="valid")

    episode_names = dataset.get_episode_names()
    episode_names.sort()

    num_episodes = None
    # for backwards compatibility
    if "num_episodes" in config["eval"]:
        num_episodes = config["eval"]["num_episodes"]
    else:
        num_episodes = 10

    episode_list = []
    if debug:
        episode_list = [episode_names[0]]
    else:
        episode_list = episode_names[:num_episodes]

    for roll_idx, episode_name in enumerate(episode_list):
        print("episode_name", episode_name)
        if keypoint_observation:
            eval_episode_keypoint_observations(config,
                                               dataset,
                                               episode_name,
                                               roll_idx,
                                               model_dy,
                                               eval_dir,
                                               start_idx=9,
                                               n_prediction=30,
                                               render_human=render_human)
        else:
            eval_episode(config,
                         dataset,
                         episode_name,
                         roll_idx,
                         model_dy,
                         eval_dir,
                         start_idx=9,
                         n_prediction=30,
                         render_human=render_human)
예제 #15
0
def train_dynamics(
    config,
    train_dir,  # str: directory to save output
    multi_episode_dict=None,
    spatial_descriptors_idx=None,
    metadata=None,
    spatial_descriptors_data=None,
):

    assert multi_episode_dict is not None
    # assert spatial_descriptors_idx is not None

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train'][
        'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))

    if metadata is not None:
        save_pickle(metadata, os.path.join(train_dir, 'metadata.p'))

    if spatial_descriptors_data is not None:
        save_pickle(spatial_descriptors_data,
                    os.path.join(train_dir, 'spatial_descriptors.p'))

    training_stats = dict()
    training_stats_file = os.path.join(train_dir, 'training_stats.yaml')

    # load the data

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=multi_episode_dict,
            phase=phase)

        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'],
            drop_last=True)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .
    '''
    Build model for dynamics prediction
    '''
    model_dy = build_dynamics_model(config)
    camera_name = config['vision_net']['camera_name']

    # criterion
    criterionMSE = nn.MSELoss()
    l1Loss = nn.L1Loss()
    smoothL1 = nn.SmoothL1Loss()

    # optimizer
    params = model_dy.parameters()
    lr = float(config['train']['lr'])
    optimizer = optim.Adam(params,
                           lr=lr,
                           betas=(config['train']['adam_beta1'], 0.999))

    # setup scheduler
    sc = config['train']['lr_scheduler']
    scheduler = None

    if config['train']['lr_scheduler']['enabled']:
        if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau":
            scheduler = ReduceLROnPlateau(optimizer,
                                          mode='min',
                                          factor=sc['factor'],
                                          patience=sc['patience'],
                                          threshold_mode=sc['threshold_mode'],
                                          cooldown=sc['cooldown'],
                                          verbose=True)
        elif config['train']['lr_scheduler']['type'] == "StepLR":
            step_size = config['train']['lr_scheduler']['step_size']
            gamma = config['train']['lr_scheduler']['gamma']
            scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
        else:
            raise ValueError("unknown scheduler type: %s" %
                             (config['train']['lr_scheduler']['type']))

    if use_gpu:
        print("using gpu")
        model_dy = model_dy.cuda()

    # print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device)
    # print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net)))

    best_valid_loss = np.inf
    valid_loss_type = config['train']['valid_loss_type']
    global_iteration = 0
    counters = {'train': 0, 'valid': 0}
    epoch_counter_external = 0
    loss = 0

    index_map = get_object_and_robot_state_indices(config)
    object_state_indices = torch.LongTensor(index_map['object_indices'])
    robot_state_indices = torch.LongTensor(index_map['robot_indices'])

    object_state_shape = config['dataset']['object_state_shape']

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:

                # only validate at a certain frequency
                if (phase == "valid") and (
                    (epoch % config['train']['valid_frequency']) != 0):
                    continue

                model_dy.train(phase == 'train')

                average_meter_container = dict()

                step_duration_meter = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    loss_container = dict()  # store the losses for this step

                    step_start_time = time.time()

                    global_iteration += 1
                    counters[phase] += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" % (global_iteration))
                            print("n_samples", n_samples)

                        # [B, n_samples, obs_dim]
                        observations = data['observations']
                        visual_observations_list = data[
                            'visual_observations_list']

                        # [B, n_samples, action_dim]
                        actions = data['actions']
                        B = actions.shape[0]

                        if use_gpu:
                            observations = observations.cuda()
                            actions = actions.cuda()

                        # compile the visual observations
                        # compute the output of the visual model for all timesteps
                        visual_model_output_list = []
                        for visual_obs in visual_observations_list:
                            # visual_obs is a dict containing observation for a single
                            # time step (of course across a batch however)
                            # visual_obs[<camera_name>]['rgb_tensor'] has shape [B, 3, H, W]

                            # probably need to cast input to cuda
                            # [B, -1, 3]
                            keypoints = visual_obs[camera_name][
                                'descriptor_keypoints_3d_world_frame']

                            # [B, K, 3] where K = len(spatial_descriptors_idx)
                            keypoints = keypoints[:, spatial_descriptors_idx]

                            B, K, _ = keypoints.shape

                            # [B, K*3]
                            keypoints_reshape = keypoints.reshape([B, K * 3])

                            if DEBUG:
                                print("keypoints.shape", keypoints.shape)
                                print("keypoints_reshape.shape",
                                      keypoints_reshape.shape)
                            visual_model_output_list.append(keypoints_reshape)

                        visual_model_output = None
                        if len(visual_model_output_list) > 0:
                            # concatenate this into a tensor
                            # [B, n_samples, vision_model_out_dim]
                            visual_model_output = torch.stack(
                                visual_model_output_list, dim=1)

                        else:
                            visual_model_output = torch.Tensor(
                            )  # empty tensor

                        # states, actions = data
                        assert actions.shape[1] == n_samples

                        # cast this to float so it can be concatenated below
                        visual_model_output = visual_model_output.type_as(
                            observations)

                        # we don't have any visual observations, so states are observations
                        # states is gotten by concatenating visual_observations and observations
                        # [B, n_samples, vision_model_out_dim + obs_dim]
                        states = torch.cat((visual_model_output, observations),
                                           dim=-1)

                        # state_cur: B x n_his x state_dim
                        # state_cur = states[:, :n_his]

                        # [B, n_his, state_dim]
                        state_init = states[:, :n_his]

                        # We want to rollout n_roll steps
                        # actions = [B, n_his + n_roll, -1]
                        # so we want action_seq.shape = [B, n_roll, -1]
                        action_start_idx = 0
                        action_end_idx = n_his + n_roll - 1
                        action_seq = actions[:, action_start_idx:
                                             action_end_idx, :]

                        if DEBUG:
                            print("states.shape", states.shape)
                            print("state_init.shape", state_init.shape)
                            print("actions.shape", actions.shape)
                            print("action_seq.shape", action_seq.shape)

                        # try using models_dy.rollout_model instead of doing this manually
                        rollout_data = rollout_model(state_init=state_init,
                                                     action_seq=action_seq,
                                                     dynamics_net=model_dy,
                                                     compute_debug_data=False)

                        # [B, n_roll, state_dim]
                        state_rollout_pred = rollout_data['state_pred']

                        # [B, n_roll, state_dim]
                        state_rollout_gt = states[:, n_his:]

                        if DEBUG:
                            print("state_rollout_gt.shape",
                                  state_rollout_gt.shape)
                            print("state_rollout_pred.shape",
                                  state_rollout_pred.shape)

                        # the loss function is between
                        # [B, n_roll, state_dim]
                        state_pred_err = state_rollout_pred - state_rollout_gt

                        # [B, n_roll, object_state_dim]
                        object_state_err = state_pred_err[:, :,
                                                          object_state_indices]
                        B, n_roll, object_state_dim = object_state_err.shape

                        # [B, n_roll, *object_state_shape]
                        object_state_err_reshape = object_state_err.reshape(
                            [B, n_roll, *object_state_shape])

                        # num weights
                        J = object_state_err_reshape.shape[2]
                        weights = model_dy.weight_matrix

                        assert len(
                            weights) == J, "len(weights) = %d, but J = %d" % (
                                len(weights), J)

                        # loss mse object, note the use of broadcasting semantics
                        # [B, n_roll]
                        object_state_loss_mse = weights * torch.pow(
                            object_state_err_reshape, 2).sum(dim=-1)
                        object_state_loss_mse = object_state_loss_mse.mean()

                        l2_object = (weights * torch.norm(
                            object_state_err_reshape, dim=-1)).mean()

                        l2_object_final_step = (weights * torch.norm(
                            object_state_err_reshape[:, -1], dim=-1)).mean()

                        # [B, n_roll, robot_state_dim]
                        robot_state_err = state_pred_err[:, :,
                                                         robot_state_indices]
                        robot_state_loss_mse = torch.pow(robot_state_err,
                                                         2).sum(dim=-1).mean()

                        loss_container[
                            'object_state_loss_mse'] = object_state_loss_mse
                        loss_container[
                            'robot_state_loss_mse'] = robot_state_loss_mse
                        loss_container['l2_object'] = l2_object
                        loss_container[
                            'l2_object_final_step'] = l2_object_final_step

                        # total loss
                        loss = object_state_loss_mse + robot_state_loss_mse
                        loss_container['loss'] = loss

                        for key, val in loss_container.items():
                            if not key in average_meter_container:
                                average_meter_container[key] = AverageMeter()

                            average_meter_container[key].update(val.item(), B)

                    step_duration_meter.update(time.time() - step_start_time)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    if (i % config['train']['log_per_iter']
                            == 0) or (global_iteration %
                                      config['train']['log_per_iter'] == 0):
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))

                        # log += ', l2: %.6f' % (loss_container['l2'].item())
                        # log += ', l2_final_step: %.6f' %(loss_container['l2_final_step'].item())

                        log += ', step time %.6f' % (step_duration_meter.avg)
                        step_duration_meter.reset()

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 100 iterations
                        if global_iteration > 100:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss_train/%s" % (phase),
                                              loss.item(), global_iteration)

                            for loss_type, loss_obj in loss_container.items():
                                plot_name = "Loss/%s/%s" % (loss_type, phase)
                                writer.add_scalar(plot_name, loss_obj.item(),
                                                  counters[phase])

                            # only plot the weights if we are in the train phase . . . .
                            if phase == "train":
                                for i in range(len(weights)):
                                    plot_name = "Weights/%d" % (i)
                                    writer.add_scalar(plot_name,
                                                      weights[i].item(),
                                                      counters[phase])

                    if phase == 'train' and global_iteration % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_epoch_%d_iter_%d' %
                            (train_dir, epoch, i))

                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'],
                    average_meter_container[valid_loss_type].avg,
                    best_valid_loss)
                print(log)

                # record all average_meter losses
                for key, meter in average_meter_container.items():
                    writer.add_scalar("AvgMeter/%s/%s" % (key, phase),
                                      meter.avg, epoch)

                if phase == "train":
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "StepLR"):
                        scheduler.step()

                if phase == 'valid':
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "ReduceLROnPlateau"):
                        scheduler.step(
                            average_meter_container[valid_loss_type].avg)

                    if average_meter_container[
                            valid_loss_type].avg < best_valid_loss:
                        best_valid_loss = average_meter_container[
                            valid_loss_type].avg
                        training_stats['epoch'] = epoch
                        training_stats['global_iteration'] = counters['valid']
                        save_yaml(training_stats, training_stats_file)
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' %
            (train_dir, epoch_counter_external))
        writer.flush()  # flush SummaryWriter events to disk
def train_dynamics(config, train_dir, data_dir, model_dy, global_iteration,
                   writer):

    # load the data
    multi_episode_dict = DrakeSimEpisodeReader.load_dataset(
        data_dir, load_image_data=False)
    '''
    for episode_name in list(multi_episode_dict.keys()):
        print("episode name", episode_name)
        episode = multi_episode_dict[episode_name]
        obs = episode.get_observation(34)
        print(obs)
    '''

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=multi_episode_dict,
            phase=phase)

        # print(config['train'])

        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'],
            drop_last=True)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()
    '''
    define model for dynamics prediction
    '''
    if model_dy is None:
        model_dy = DynaNetMLP(config)

    # criterion
    MSELoss = nn.MSELoss()
    L1Loss = nn.L1Loss()

    # optimizer
    params = model_dy.parameters()
    lr = float(config['train']['lr'])
    optimizer = optim.Adam(params,
                           lr=lr,
                           betas=(config['train']['adam_beta1'], 0.999))

    # setup scheduler
    sc = config['train']['lr_scheduler']
    scheduler = None

    if config['train']['lr_scheduler']['enabled']:
        if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau":
            scheduler = ReduceLROnPlateau(optimizer,
                                          mode='min',
                                          factor=sc['factor'],
                                          patience=sc['patience'],
                                          threshold_mode=sc['threshold_mode'],
                                          cooldown=sc['cooldown'],
                                          verbose=True)
        elif config['train']['lr_scheduler']['type'] == "StepLR":
            step_size = config['train']['lr_scheduler']['step_size']
            gamma = config['train']['lr_scheduler']['gamma']
            scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
        else:
            raise ValueError("unknown scheduler type: %s" %
                             (config['train']['lr_scheduler']['type']))

    if use_gpu:
        print("using gpu")
        model_dy = model_dy.cuda()

    best_valid_loss = np.inf
    counters = {'train': 0, 'valid': 0}

    try:
        for epoch in range(config['train']['n_epoch']):
            phases = ['train', 'valid']

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:
                model_dy.train(phase == 'train')

                meter_loss_rmse = AverageMeter()
                step_duration_meter = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    loss_container = dict()  # store the losses for this step

                    step_start_time = time.time()

                    global_iteration += 1
                    counters[phase] += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" % global_iteration)
                            print("n_samples", n_samples)

                        # [B, n_samples, obs_dim]
                        observations = data['observations']

                        # [B, n_samples, action_dim]
                        actions = data['actions']
                        B = actions.shape[0]

                        if use_gpu:
                            observations = observations.cuda()
                            actions = actions.cuda()

                        # states, actions = data
                        assert actions.shape[1] == n_samples
                        loss_mse = 0.

                        # we don't have any visual observations, so states are observations
                        states = observations

                        # [B, n_his, state_dim]
                        state_init = states[:, :n_his]

                        # We want to rollout n_roll steps
                        # actions = [B, n_his + n_roll, -1]
                        # so we want action_seq.shape = [B, n_roll, -1]
                        action_start_idx = 0
                        action_end_idx = n_his + n_roll - 1
                        action_seq = actions[:, action_start_idx:
                                             action_end_idx, :]

                        if DEBUG:
                            print("states.shape", states.shape)
                            print("state_init.shape", state_init.shape)
                            print("actions.shape", actions.shape)
                            print("action_seq.shape", action_seq.shape)

                        # try using models_dy.rollout_model instead of doing this manually
                        rollout_data = rollout_model(state_init=state_init,
                                                     action_seq=action_seq,
                                                     dynamics_net=model_dy,
                                                     compute_debug_data=False)

                        # [B, n_roll, state_dim]
                        state_rollout_pred = rollout_data['state_pred']

                        # [B, n_roll, state_dim]
                        state_rollout_gt = states[:, n_his:]

                        if DEBUG:
                            print("state_rollout_gt.shape",
                                  state_rollout_gt.shape)
                            print("state_rollout_pred.shape",
                                  state_rollout_pred.shape)

                        # the loss function is between
                        # [B, n_roll, state_dim]
                        state_pred_err = state_rollout_pred - state_rollout_gt

                        # everything is in 3D space now so no need to do any scaling
                        # all the losses would be in meters . . . .
                        loss_mse = MSELoss(state_rollout_pred,
                                           state_rollout_gt)
                        loss_l1 = L1Loss(state_rollout_pred, state_rollout_gt)
                        meter_loss_rmse.update(np.sqrt(loss_mse.item()), B)

                        # compute losses at final step of the rollout
                        mse_final_step = MSELoss(state_rollout_pred[:, -1, :],
                                                 state_rollout_gt[:, -1, :])
                        l2_final_step = torch.norm(state_pred_err[:, -1],
                                                   dim=-1).mean()
                        l1_final_step = L1Loss(state_rollout_pred[:, -1, :],
                                               state_rollout_gt[:, -1, :])

                        loss_container['mse'] = loss_mse
                        loss_container['l1'] = loss_l1
                        loss_container['mse_final_step'] = mse_final_step
                        loss_container['l1_final_step'] = l1_final_step
                        loss_container['l2_final_step'] = l2_final_step

                    step_duration_meter.update(time.time() - step_start_time)
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss_mse.backward()
                        optimizer.step()

                    if i % config['train']['log_per_iter'] == 0:
                        log = '%s %d [%d/%d][%d/%d] LR: %.6f' % (
                            phase, global_iteration, epoch,
                            config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))
                        log += ', rmse: %.6f (%.6f)' % (np.sqrt(
                            loss_mse.item()), meter_loss_rmse.avg)

                        log += ', step time %.6f' % (step_duration_meter.avg)
                        step_duration_meter.reset()

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 100 iterations
                        if global_iteration > 100:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss_MSE/%s" % (phase),
                                              loss_mse.item(),
                                              global_iteration)
                            writer.add_scalar("L1/%s" % (phase),
                                              loss_l1.item(), global_iteration)
                            writer.add_scalar("RMSE average loss/%s" % (phase),
                                              meter_loss_rmse.avg,
                                              global_iteration)

                            writer.add_scalar("n_taj", len(multi_episode_dict),
                                              global_iteration)

                            for loss_type, loss_obj in loss_container.items():
                                plot_name = "Loss/%s/%s" % (loss_type, phase)
                                writer.add_scalar(plot_name, loss_obj.item(),
                                                  global_iteration)

                    if phase == 'train' and global_iteration % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_iter_%d' %
                            (train_dir, global_iteration))

                log = '%s %d [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, global_iteration, epoch, config['train']['n_epoch'],
                    meter_loss_rmse.avg, best_valid_loss)
                print(log)

                if phase == "train":
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "StepLR"):
                        scheduler.step()

                if phase == 'valid':
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "ReduceLROnPlateau"):
                        scheduler.step(meter_loss_rmse.avg)

                    if meter_loss_rmse.avg < best_valid_loss:
                        best_valid_loss = meter_loss_rmse.avg
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_iter_%d_keyboard_interrupt' %
            (train_dir, global_iteration))
        writer.flush()  # flush SummaryWriter events to disk

    return model_dy, global_iteration
예제 #17
0
def train_dynamics(
        config,
        train_dir,  # str: directory to save output
):

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train'][
        'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))

    print(config)

    # load the data
    episodes = load_episodes_from_config(config)

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=episodes,
            phase=phase)

        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'])

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .
    '''
    define model for dynamics prediction
    '''
    model_dy = None

    if config['train']['resume_epoch'] >= 0:
        # if resume from a pretrained checkpoint
        state_dict_path = os.path.join(
            train_dir, 'net_dy_epoch_%d_iter_%d_state_dict.pth' %
            (config['train']['resume_epoch'], config['train']['resume_iter']))
        print("Loading saved ckp from %s" % state_dict_path)

        # why is this needed if we already do torch.load???
        model_dy.load_state_dict(torch.load(state_dict_path))

        # don't we also need to load optimizer state from pre-trained???
    else:
        # not starting from pre-trained create the network and compute the
        # normalization parameters
        model_dy = DynaNetMLP(config)

        # compute normalization params
        stats = datasets["train"].compute_dataset_statistics()

        obs_mean = stats['observations']['mean']
        obs_std = stats['observations']['std']
        observations_normalizer = DataNormalizer(obs_mean, obs_std)

        action_mean = stats['actions']['mean']
        action_std = stats['actions']['std']
        actions_normalizer = DataNormalizer(action_mean, action_std)

        model_dy.action_normalizer = actions_normalizer
        model_dy.state_normalizer = observations_normalizer

    print("model_dy #params: %d" % count_trainable_parameters(model_dy))

    # criterion
    criterionMSE = nn.MSELoss()

    # optimizer
    params = model_dy.parameters()
    optimizer = optim.Adam(params,
                           lr=config['train']['lr'],
                           betas=(config['train']['adam_beta1'], 0.999))
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  factor=0.9,
                                  patience=10,
                                  verbose=True)

    if use_gpu:
        model_dy = model_dy.cuda()

    best_valid_loss = np.inf
    global_iteration = 0

    epoch_counter_external = 0

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:
                model_dy.train(phase == 'train')

                meter_loss_rmse = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    global_iteration += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if config['env']['type'] in ['PusherSlider']:
                            states = data['observations']
                            actions = data['actions']

                            if use_gpu:
                                states = states.cuda()
                                actions = actions.cuda()

                            # states, actions = data
                            assert states.size(1) == n_samples

                            # normalize states and actions once for entire rollout
                            states = model_dy.state_normalizer.normalize(
                                states)
                            actions = model_dy.action_normalizer.normalize(
                                actions)

                            B = states.size(0)
                            loss_mse = 0.

                            # state_cur: B x n_his x state_dim
                            state_cur = states[:, :n_his]

                            for j in range(n_roll):

                                state_des = states[:, n_his + j]

                                # action_cur: B x n_his x action_dim
                                action_cur = actions[:, j:j +
                                                     n_his] if actions is not None else None

                                # state_pred: B x state_dim
                                # state_cur: B x n_his x state_dim
                                # state_pred: B x state_dim
                                state_pred = model_dy(state_cur, action_cur)

                                loss_mse_cur = criterionMSE(
                                    state_pred, state_des)
                                loss_mse += loss_mse_cur / n_roll

                                # update state_cur
                                # state_pred.unsqueeze(1): B x 1 x state_dim
                                state_cur = torch.cat([
                                    state_cur[:, 1:],
                                    state_pred.unsqueeze(1)
                                ], 1)

                            meter_loss_rmse.update(np.sqrt(loss_mse.item()), B)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss_mse.backward()
                        optimizer.step()

                    if i % config['train']['log_per_iter'] == 0:
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))
                        log += ', rmse: %.6f (%.6f)' % (np.sqrt(
                            loss_mse.item()), meter_loss_rmse.avg)

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 500 iterations
                        if global_iteration > 500:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss/train", loss_mse.item(),
                                              global_iteration)
                            writer.add_scalar("RMSE average loss/train",
                                              meter_loss_rmse.avg,
                                              global_iteration)

                    if phase == 'train' and i % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_epoch_%d_iter_%d' %
                            (train_dir, epoch, i))

                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'],
                    meter_loss_rmse.avg, best_valid_loss)
                print(log)

                if phase == 'valid':
                    scheduler.step(meter_loss_rmse.avg)
                    writer.add_scalar("RMSE average loss/valid",
                                      meter_loss_rmse.avg, global_iteration)
                    if meter_loss_rmse.avg < best_valid_loss:
                        best_valid_loss = meter_loss_rmse.avg
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' %
            (train_dir, epoch_counter_external))
        writer.flush()  # flush SummaryWriter events to disk
예제 #18
0
def eval_dynamics(
    config,
    eval_dir,  # str: directory to save output
    multi_episode_dict=None,
    n_rollout_list=None,
    model_dy=None,  # should already be in eval mode
    phase_list=None,  # typically it's
    num_epochs=10,
):

    assert n_rollout_list is not None
    assert model_dy is not None
    assert multi_episode_dict is not None

    if phase_list is None:
        phase_list = ["valid"]

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    tensorboard_dir = os.path.join(eval_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(eval_dir, "config.yaml"))

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    use_gpu = torch.cuda.is_available()

    best_valid_loss = np.inf
    global_iteration = 0
    counters = {'train': 0, 'valid': 0}
    epoch_counter_external = 0
    stats = dict()

    for n_rollout in n_rollout_list:
        stats[n_rollout] = dict()
        config_tmp = copy.copy(config)
        config_tmp['train']['n_rollout'] = n_rollout
        for phase in phase_list:
            stats[n_rollout][phase] = dict()
            print("Loading data for %s" % phase)
            dataset = MultiEpisodeDataset(
                config_tmp,
                action_function=action_function,
                observation_function=observation_function,
                episodes=multi_episode_dict,
                phase=phase)

            dataloader = DataLoader(dataset,
                                    batch_size=config['train']['batch_size'],
                                    shuffle=True,
                                    num_workers=config['train']['num_workers'],
                                    drop_last=True)

            loss_tensor_container = {"l2_avg": [], "l2_final_step": []}

            step_duration_meter = AverageMeter()
            global_iteration = 0

            for epoch in range(num_epochs):
                for i, data in enumerate(dataloader):

                    loss_container = dict()  # store the losses for this step
                    # types of losses ["l2_avg", "l2_final_step"]

                    step_start_time = time.time()
                    global_iteration += 1
                    counters[phase] += 1

                    with torch.no_grad():
                        n_his = config['train']['n_history']
                        n_roll = n_rollout
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" % (global_iteration))
                            print("n_samples", n_samples)

                        # [B, n_samples, obs_dim]
                        observations = data['observations']

                        # [B, n_samples, action_dim]
                        actions = data['actions']
                        B = actions.shape[0]

                        if use_gpu:
                            observations = observations.cuda()
                            actions = actions.cuda()

                        # states, actions = data
                        assert actions.shape[1] == n_samples
                        loss_mse = 0.

                        # we don't have any visual observations, so states are observations
                        states = observations

                        # state_cur: B x n_his x state_dim
                        # state_cur = states[:, :n_his]

                        # [B, n_his, state_dim]
                        state_init = states[:, :n_his]

                        # We want to rollout n_roll steps
                        # actions = [B, n_his + n_roll, -1]
                        # so we want action_seq.shape = [B, n_roll, -1]
                        action_start_idx = 0
                        action_end_idx = n_his + n_roll - 1
                        action_seq = actions[:, action_start_idx:
                                             action_end_idx, :]

                        if DEBUG:
                            print("states.shape", states.shape)
                            print("state_init.shape", state_init.shape)
                            print("actions.shape", actions.shape)
                            print("action_seq.shape", action_seq.shape)

                        # try using models_dy.rollout_model instead of doing this manually
                        rollout_data = rollout_model(state_init=state_init,
                                                     action_seq=action_seq,
                                                     dynamics_net=model_dy,
                                                     compute_debug_data=False)

                        # [B, n_roll, state_dim]
                        state_rollout_pred = rollout_data['state_pred']

                        # [B, n_roll, state_dim]
                        state_rollout_gt = states[:, n_his:]

                        if DEBUG:
                            print("state_rollout_gt.shape",
                                  state_rollout_gt.shape)
                            print("state_rollout_pred.shape",
                                  state_rollout_pred.shape)

                        # the loss function is between
                        # [B, n_roll, state_dim]
                        state_pred_err = state_rollout_pred - state_rollout_gt

                        # [B]
                        l2_avg_tensor = torch.mean(torch.norm(state_pred_err,
                                                              dim=-1),
                                                   dim=1).detach().cpu()
                        l2_avg = l2_avg_tensor.mean()

                        # [B]
                        l2_final_step_tensor = torch.norm(
                            state_pred_err[:, -1], dim=-1).detach().cpu()
                        l2_final_step = l2_final_step_tensor.mean()

                        loss_tensor_container["l2_avg"].append(l2_avg_tensor)
                        loss_container["l2_avg"] = l2_avg

                        loss_tensor_container["l2_final_step"].append(
                            l2_final_step_tensor)
                        loss_container["l2_final_step"] = l2_final_step

                    step_duration_meter.update(time.time() - step_start_time)

                    if (i % config['train']['log_per_iter']
                            == 0) or (global_iteration %
                                      config['train']['log_per_iter'] == 0):
                        # print some logging information
                        log = ""
                        log += ', step time %.6f' % (step_duration_meter.avg)

                        # log data to tensorboard
                        for loss_type, loss_obj in loss_container.items():
                            plot_name = "%s/n_roll_%s/%s" % (loss_type, n_roll,
                                                             phase)
                            writer.add_scalar(plot_name, loss_obj.item(),
                                              global_iteration)

                            log += " %s: %.6f," % (plot_name, loss_obj.item())

                        print(log)

                    writer.flush()  # flush SummaryWriter events to disk

            stats[n_rollout][phase] = dict()
            for loss_type in loss_tensor_container:
                t = torch.cat(loss_tensor_container[loss_type])
                mean = t.mean()
                median = t.median()
                std = t.std()

                stats[n_rollout][phase][loss_type] = {
                    'mean': mean,
                    'median': median,
                    'std': std
                }

                for stat_type, val in stats[n_rollout][phase][loss_type].items(
                ):
                    plot_name = "stats/%s/n_roll_%d/%s/%s" % (
                        loss_type, n_roll, phase, stat_type)

                    for idx_tmp in [0, 10, 100]:
                        writer.add_scalar(plot_name, val, idx_tmp)
예제 #19
0
def main():
    # load dynamics model
    model_dict = load_model_state_dict()
    model = model_dict['model_dy']
    model_dd = model_dict['model_dd']
    config = model.config

    env_config = load_yaml(os.path.join(get_project_root(), 'experiments/exp_20_mugs/config.yaml'))
    env_config['env']['observation']['depth_int16'] = True
    n_history = config['train']['n_history']

    initial_cond = generate_initial_condition(env_config, push_length=PUSH_LENGTH)
    env_config = initial_cond['config']

    # enable the right observations

    camera_name = model_dict['metadata']['camera_name']
    spatial_descriptor_data = model_dict['spatial_descriptor_data']
    ref_descriptors = spatial_descriptor_data['spatial_descriptors']
    K = ref_descriptors.shape[0]

    ref_descriptors = torch.Tensor(ref_descriptors).cuda()  # put them on the GPU

    print("ref_descriptors\n", ref_descriptors)
    print("ref_descriptors.shape", ref_descriptors.shape)

    # create the environment
    # create the environment
    env = DrakeMugsEnv(env_config)
    env.reset()

    T_world_camera = env.camera_pose(camera_name)
    camera_K_matrix = env.camera_K_matrix(camera_name)

    # create another environment for doing rollouts
    env2 = DrakeMugsEnv(env_config, visualize=False)
    env2.reset()

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.drake_pusher_position_3D(config)
    visual_observation_function = \
        VisualObservationFunctionFactory.descriptor_keypoints_3D(config=config,
                                                                 camera_name=camera_name,
                                                                 model_dd=model_dd,
                                                                 ref_descriptors=ref_descriptors,
                                                                 K_matrix=camera_K_matrix,
                                                                 T_world_camera=T_world_camera,
                                                                 )

    episode = OnlineEpisodeReader()
    mpc_input_builder = DynamicsModelInputBuilder(observation_function=observation_function,
                                                  visual_observation_function=visual_observation_function,
                                                  action_function=action_function,
                                                  episode=episode)

    vis = meshcat_utils.make_default_visualizer_object()
    vis.delete()

    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    obs_init = env.get_observation()

    #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    # add just some large number of these
    episode.clear()
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    def goal_func(obs_tmp):
        state_tmp = mpc_input_builder.get_state_input_single_timestep({'observation': obs_tmp})['state']
        return model.compute_z_state(state_tmp.unsqueeze(0))['z_object'].flatten()


    #
    idx = episode.get_latest_idx()
    obs_raw = episode.get_observation(idx)
    z_object_goal = goal_func(obs_raw)
    z_keypoints_init_W = keypoints_3D_from_dynamics_model_output(z_object_goal, K)
    z_keypoints_init_W = torch_utils.cast_to_numpy(z_keypoints_init_W)

    z_keypoints_obj = keypoints_world_frame_to_object_frame(z_keypoints_init_W,
                                                          T_W_obj=slider_pose_from_observation(obs_init))

    color = [1, 0, 0]
    meshcat_utils.visualize_points(vis=vis,
                                   name="keypoints_W",
                                   pts=z_keypoints_init_W,
                                   color=color,
                                   size=0.02,
                                   )

    # input("press Enter to continue")

    # rollout single action sequence using the simulator
    action_sequence_np = torch_utils.cast_to_numpy(initial_cond['action_sequence'])
    N = action_sequence_np.shape[0]
    obs_rollout_gt = env_utils.rollout_action_sequence(env, action_sequence_np)[
        'observations']

    # using the vision model to get "goal" keypoints
    z_object_goal = goal_func(obs_rollout_gt[-1])
    z_object_goal_np = torch_utils.cast_to_numpy(z_object_goal)
    z_keypoints_goal = keypoints_3D_from_dynamics_model_output(z_object_goal, K)
    z_keypoints_goal = torch_utils.cast_to_numpy(z_keypoints_goal)

    # visualize goal keypoints
    color = [0, 1, 0]
    meshcat_utils.visualize_points(vis=vis,
                                   name="goal_keypoints",
                                   pts=z_keypoints_goal,
                                   color=color,
                                   size=0.02,
                                   )

    # input("press Enter to continue")

    #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    # add just some large number of these
    episode.clear()
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    # [n_history, state_dim]
    idx = episode.get_latest_idx()

    dyna_net_input = mpc_input_builder.get_dynamics_model_input(idx, n_history=n_history)
    state_init = dyna_net_input['states'].cuda() # [n_history, state_dim]
    action_init = dyna_net_input['actions'] # [n_history, action_dim]


    print("state_init.shape", state_init.shape)
    print("action_init.shape", action_init.shape)


    action_seq_gt_torch = torch_utils.cast_to_torch(initial_cond['action_sequence'])
    action_input = torch.cat((action_init[:(n_history-1)], action_seq_gt_torch), dim=0).cuda()
    print("action_input.shape", action_input.shape)


    # rollout using the ground truth actions and learned model
    # need to add the batch dim to do that
    z_init = model.compute_z_state(state_init)['z']
    rollout_pred = rollout_model(state_init=z_init.unsqueeze(0),
                                 action_seq=action_input.unsqueeze(0),
                                 dynamics_net=model,
                                 compute_debug_data=True)

    state_pred_rollout = rollout_pred['state_pred']

    print("state_pred_rollout.shape", state_pred_rollout.shape)

    for i in range(N):
        # vis GT for now
        name = "GT_3D/%d" % (i)
        T_W_obj = slider_pose_from_observation(obs_rollout_gt[i])
        # print("T_W_obj", T_W_obj)

        # green
        color = np.array([0, 1, 0]) * get_color_intensity(i, N)
        meshcat_utils.visualize_points(vis=vis,
                                       name=name,
                                       pts=z_keypoints_obj,
                                       color=color,
                                       size=0.01,
                                       T=T_W_obj)

        # red
        color = np.array([0, 0, 1]) * get_color_intensity(i, N)
        state_pred = state_pred_rollout[:, i, :]
        pts_pred = keypoints_3D_from_dynamics_model_output(state_pred, K).squeeze()
        pts_pred = pts_pred.detach().cpu().numpy()
        name = "pred_3D/%d" % (i)
        meshcat_utils.visualize_points(vis=vis,
                                       name=name,
                                       pts=pts_pred,
                                       color=color,
                                       size=0.01,
                                       )

    # input("finished visualizing GT rollout\npress Enter to continue")
    index_dict = get_object_and_robot_state_indices(config)
    object_indices = index_dict['object_indices']

    # reset the environment and use the MPC controller to stabilize this
    # now setup the MPC to try to stabilize this . . . .
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    episode.clear()

    # add just some large number of these
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    # input("press Enter to continue")

    # make a planner config
    planner_config = copy.copy(config)
    config_tmp = load_yaml(os.path.join(get_project_root(), 'experiments/drake_pusher_slider/eval_config.yaml'))
    planner_config['mpc'] = config_tmp['mpc']
    planner = None
    if PLANNER_TYPE == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif PLANNER_TYPE == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (PLANNER_TYPE))

    mpc_out = None
    action_seq_mpc = None
    state_pred_mpc = None
    counter = -1
    while True:
        counter += 1
        print("\n\n-----Running MPC Optimization: Counter (%d)-------" % (counter))

        obs_cur = env.get_observation()
        episode.add_observation_only(obs_cur)

        if counter == 0 or REPLAN:
            print("replanning")
            ####### Run the MPC ##########

            # [1, state_dim]

            n_look_ahead = N - counter
            if USE_FIXED_MPC_HORIZON:
                n_look_ahead = MPC_HORIZON
            if n_look_ahead == 0:
                break

            # start_time = time.time()
            # idx of current observation
            idx = episode.get_latest_idx()
            mpc_start_time = time.time()
            mpc_input_data = mpc_input_builder.get_dynamics_model_input(idx, n_history=n_history)
            state_cur = mpc_input_data['states']
            action_his = mpc_input_data['actions']

            if mpc_out is not None:
                action_seq_rollout_init = mpc_out['action_seq'][1:]
            else:
                action_seq_rollout_init = None

            # run MPPI
            z_cur = None
            with torch.no_grad():
                z_cur = model.compute_z_state(state_cur.unsqueeze(0).cuda())['z'].squeeze(0)



            mpc_out = planner.trajectory_optimization(state_cur=z_cur,
                                                      action_his=action_his,
                                                      obs_goal=z_object_goal_np,
                                                      model_dy=model,
                                                      action_seq_rollout_init=action_seq_rollout_init,
                                                      n_look_ahead=n_look_ahead,
                                                      eval_indices=object_indices,
                                                      rollout_best_action_sequence=True,
                                                      verbose=True,
                                                      )

            print("MPC step took %.4f seconds" %(time.time() - mpc_start_time))
            action_seq_mpc = mpc_out['action_seq'].cpu().numpy()


        # Rollout with ground truth simulator dynamics
        action_seq_mpc = torch_utils.cast_to_numpy(mpc_out['action_seq'])
        env2.set_simulator_state_from_observation_dict(env2.get_mutable_context(), obs_cur)
        obs_mpc_gt = env_utils.rollout_action_sequence(env2, action_seq_mpc)['observations']
        state_pred_mpc = torch_utils.cast_to_numpy(mpc_out['state_pred'])

        vis['mpc_3D'].delete()
        vis['mpc_GT_3D'].delete()

        L = len(obs_mpc_gt)
        print("L", L)
        if L == 0:
            break
        for i in range(L):
            # red
            color = np.array([1, 0, 0]) * get_color_intensity(i, L)
            state_pred = state_pred_mpc[i, :]
            state_pred = np.expand_dims(state_pred, 0)  # may need to expand dims here
            pts_pred = keypoints_3D_from_dynamics_model_output(state_pred, K).squeeze()
            name = "mpc_3D/%d" % (i)
            meshcat_utils.visualize_points(vis=vis,
                                           name=name,
                                           pts=pts_pred,
                                           color=color,
                                           size=0.01,
                                           )

            # ground truth rollout of the MPC action_seq
            name = "mpc_GT_3D/%d" % (i)
            T_W_obj = slider_pose_from_observation(obs_mpc_gt[i])

            # green
            color = np.array([1, 1, 0]) * get_color_intensity(i, L)
            meshcat_utils.visualize_points(vis=vis,
                                           name=name,
                                           pts=z_keypoints_obj,
                                           color=color,
                                           size=0.01,
                                           T=T_W_obj)

        action_cur = action_seq_mpc[0]

        print("action_cur", action_cur)
        # print("action_GT", initial_cond['action'])
        input("press Enter to continue")

        # add observation actions to the episode
        obs_cur = env.get_observation()
        episode.replace_observation_action(obs_cur, action_cur)

        # step the simulator
        env.step(action_cur)

        # visualize current keypoint positions
        obs_cur = env.get_observation()
        T_W_obj = slider_pose_from_observation(obs_cur)

        # yellow
        color = np.array([1, 1, 0])
        meshcat_utils.visualize_points(vis=vis,
                                       name="keypoint_cur",
                                       pts=z_keypoints_obj,
                                       color=color,
                                       size=0.02,
                                       T=T_W_obj)

        action_seq_mpc = action_seq_mpc[1:]
        state_pred_mpc = state_pred_mpc[1:]

    obs_final = env.get_observation()

    pose_error = compute_pose_error(obs_rollout_gt[-1],
                                    obs_final)

    print("position_error: %.3f"  %(pose_error['position_error']))
    print("angle error degrees: %.3f" %(pose_error['angle_error_degrees']))
# must import pydrake BEFORE torch
import pydrake

# key_dynam
from key_dynam.utils import dev_utils
from key_dynam.dataset.episode_dataset import MultiEpisodeDataset
from key_dynam.dataset.function_factory import ObservationFunctionFactory, ActionFunctionFactory

multi_episode_dict = dev_utils.load_drake_pusher_slider_episodes()
config = dev_utils.load_simple_config()

action_function = ActionFunctionFactory.function_from_config(config)
observation_function = ObservationFunctionFactory.function_from_config(config)
dataset = MultiEpisodeDataset(config,
                              action_function=action_function,
                              observation_function=observation_function,
                              episodes=multi_episode_dict,
                              phase="train")

episode_name = dataset.get_episode_names()[0]
episode = dataset.episode_dict[episode_name]
idx = 5

data = dataset._getitem(
    episode,
    idx,
    rollout_length=5,
    n_history=2,
    visual_observation=False,
)
print("\n\ndata.keys()", data.keys())