def build_dynamics_model(config):
    dyna_net = None

    dyna_net_type = config['dynamics_net']["model_type"]
    if dyna_net_type == "mlp":
        dyna_net = DynaNetMLP(config)
    elif dyna_net_type == "mlp_weighted":
        dyna_net = DynaNetMLPWeighted(config)
    elif dyna_net_type == "mlp_weight_matrix":
        dyna_net = DynaNetMLPWeightMatrix(config)
    else:
        raise ValueError("unsupported dynamics net type")

    return dyna_net
def build_visual_dynamics_model(config):
    vision_net = None
    dyna_net = None

    vision_net_type = config['vision_net']['model_type']
    if vision_net_type == "PrecomputedDescriptorNet":
        vision_net = PrecomputedDescriptorNet(config)
        vision_net.initialize_weights()
    else:
        raise ValueError("unsupported vision net type")

    dyna_net_type = config['dynamics_net']["model_type"]
    if dyna_net_type == "mlp":
        dyna_net = DynaNetMLP(config)
    else:
        raise ValueError("unsupported dynamics net type")

    visual_dynamics_net = VisualDynamicsNet(config, vision_net, dyna_net)

    return visual_dynamics_net
Exemple #3
0
def train_dynamics(
        config,
        train_dir,  # str: directory to save output
):

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train'][
        'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))

    print(config)

    # load the data
    episodes = load_episodes_from_config(config)

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=episodes,
            phase=phase)

        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'])

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .
    '''
    define model for dynamics prediction
    '''
    model_dy = None

    if config['train']['resume_epoch'] >= 0:
        # if resume from a pretrained checkpoint
        state_dict_path = os.path.join(
            train_dir, 'net_dy_epoch_%d_iter_%d_state_dict.pth' %
            (config['train']['resume_epoch'], config['train']['resume_iter']))
        print("Loading saved ckp from %s" % state_dict_path)

        # why is this needed if we already do torch.load???
        model_dy.load_state_dict(torch.load(state_dict_path))

        # don't we also need to load optimizer state from pre-trained???
    else:
        # not starting from pre-trained create the network and compute the
        # normalization parameters
        model_dy = DynaNetMLP(config)

        # compute normalization params
        stats = datasets["train"].compute_dataset_statistics()

        obs_mean = stats['observations']['mean']
        obs_std = stats['observations']['std']
        observations_normalizer = DataNormalizer(obs_mean, obs_std)

        action_mean = stats['actions']['mean']
        action_std = stats['actions']['std']
        actions_normalizer = DataNormalizer(action_mean, action_std)

        model_dy.action_normalizer = actions_normalizer
        model_dy.state_normalizer = observations_normalizer

    print("model_dy #params: %d" % count_trainable_parameters(model_dy))

    # criterion
    criterionMSE = nn.MSELoss()

    # optimizer
    params = model_dy.parameters()
    optimizer = optim.Adam(params,
                           lr=config['train']['lr'],
                           betas=(config['train']['adam_beta1'], 0.999))
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  factor=0.9,
                                  patience=10,
                                  verbose=True)

    if use_gpu:
        model_dy = model_dy.cuda()

    best_valid_loss = np.inf
    global_iteration = 0

    epoch_counter_external = 0

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:
                model_dy.train(phase == 'train')

                meter_loss_rmse = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    global_iteration += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if config['env']['type'] in ['PusherSlider']:
                            states = data['observations']
                            actions = data['actions']

                            if use_gpu:
                                states = states.cuda()
                                actions = actions.cuda()

                            # states, actions = data
                            assert states.size(1) == n_samples

                            # normalize states and actions once for entire rollout
                            states = model_dy.state_normalizer.normalize(
                                states)
                            actions = model_dy.action_normalizer.normalize(
                                actions)

                            B = states.size(0)
                            loss_mse = 0.

                            # state_cur: B x n_his x state_dim
                            state_cur = states[:, :n_his]

                            for j in range(n_roll):

                                state_des = states[:, n_his + j]

                                # action_cur: B x n_his x action_dim
                                action_cur = actions[:, j:j +
                                                     n_his] if actions is not None else None

                                # state_pred: B x state_dim
                                # state_cur: B x n_his x state_dim
                                # state_pred: B x state_dim
                                state_pred = model_dy(state_cur, action_cur)

                                loss_mse_cur = criterionMSE(
                                    state_pred, state_des)
                                loss_mse += loss_mse_cur / n_roll

                                # update state_cur
                                # state_pred.unsqueeze(1): B x 1 x state_dim
                                state_cur = torch.cat([
                                    state_cur[:, 1:],
                                    state_pred.unsqueeze(1)
                                ], 1)

                            meter_loss_rmse.update(np.sqrt(loss_mse.item()), B)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss_mse.backward()
                        optimizer.step()

                    if i % config['train']['log_per_iter'] == 0:
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))
                        log += ', rmse: %.6f (%.6f)' % (np.sqrt(
                            loss_mse.item()), meter_loss_rmse.avg)

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 500 iterations
                        if global_iteration > 500:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss/train", loss_mse.item(),
                                              global_iteration)
                            writer.add_scalar("RMSE average loss/train",
                                              meter_loss_rmse.avg,
                                              global_iteration)

                    if phase == 'train' and i % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_epoch_%d_iter_%d' %
                            (train_dir, epoch, i))

                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'],
                    meter_loss_rmse.avg, best_valid_loss)
                print(log)

                if phase == 'valid':
                    scheduler.step(meter_loss_rmse.avg)
                    writer.add_scalar("RMSE average loss/valid",
                                      meter_loss_rmse.avg, global_iteration)
                    if meter_loss_rmse.avg < best_valid_loss:
                        best_valid_loss = meter_loss_rmse.avg
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' %
            (train_dir, epoch_counter_external))
        writer.flush()  # flush SummaryWriter events to disk
def train_dynamics(config, data_path, train_dir):

    # access dict values as attributes
    config = edict(config)

    # set random seed for reproduction
    set_seed(config.train.random_seed)

    st_epoch = config.train.resume_epoch if config.train.resume_epoch > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    print(config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(config, data_path, phase=phase)

        dataloaders[phase] = DataLoader(
            datasets[phase], batch_size=config.train.batch_size,
            shuffle=True if phase == 'train' else False,
            num_workers=config.train.num_workers)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()


    '''
    define model for dynamics prediction
    '''
    model_dy = DynaNetMLP(config)
    print("model_dy #params: %d" % count_trainable_parameters(model_dy))

    if config.train.resume_epoch >= 0:
        # if resume from a pretrained checkpoint
        model_dy_path = os.path.join(
            train_dir, 'net_dy_epoch_%d_iter_%d.pth' % (
                config.train.resume_epoch, config.train.resume_iter))
        print("Loading saved ckp from %s" % model_dy_path)
        model_dy.load_state_dict(torch.load(model_dy_path))


    # criterion
    criterionMSE = nn.MSELoss()

    # optimizer
    params = model_dy.parameters()
    optimizer = optim.Adam(params, lr=config.train.lr, betas=(config.train.adam_beta1, 0.999))
    scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10, verbose=True)

    if use_gpu:
        model_dy = model_dy.cuda()


    best_valid_loss = np.inf

    for epoch in range(st_epoch, config.train.n_epoch):
        phases = ['train', 'valid']

        for phase in phases:
            model_dy.train(phase == 'train')

            meter_loss_rmse = AverageMeter()

            bar = ProgressBar(max_value=data_n_batches[phase])
            loader = dataloaders[phase]

            for i, data in bar(enumerate(loader)):

                if use_gpu:
                    if isinstance(data, list):
                        data = [d.cuda() for d in data]
                    else:
                        data = data.cuda()

                with torch.set_grad_enabled(phase == 'train'):
                    n_his, n_roll = config.train.n_history, config.train.n_rollout
                    n_samples = n_his + n_roll

                    if config.env.type in ['PusherSlider']:
                        states, actions = data
                        assert states.size(1) == n_samples

                        B = states.size(0)
                        loss_mse = 0.

                        # state_cur: B x n_his x state_dim
                        state_cur = states[:, :n_his]

                        for j in range(n_roll):

                            state_des = states[:, n_his + j]

                            # action_cur: B x n_his x action_dim
                            action_cur = actions[:, j : j + n_his] if actions is not None else None

                            # state_pred: B x state_dim
                            state_pred = model_dy(state_cur, action_cur)

                            loss_mse_cur = criterionMSE(state_pred, state_des)
                            loss_mse += loss_mse_cur / config.train.n_rollout

                            # update state_cur
                            state_cur = torch.cat([state_cur[:, 1:], state_pred.unsqueeze(1)], 1)

                        meter_loss_rmse.update(np.sqrt(loss_mse.item()), B)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss_mse.backward()
                    optimizer.step()

                if i % config.train.log_per_iter == 0:
                    log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                        phase, epoch, config.train.n_epoch, i, data_n_batches[phase],
                        get_lr(optimizer))
                    log += ', rmse: %.6f (%.6f)' % (
                        np.sqrt(loss_mse.item()), meter_loss_rmse.avg)

                    print(log)

                if phase == 'train' and i % config.train.ckp_per_iter == 0:
                    torch.save(model_dy.state_dict(), '%s/net_dy_epoch_%d_iter_%d.pth' % (train_dir, epoch, i))

            log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                phase, epoch, config.train.n_epoch, meter_loss_rmse.avg, best_valid_loss)
            print(log)

            if phase == 'valid':
                scheduler.step(meter_loss_rmse.avg)
                if meter_loss_rmse.avg < best_valid_loss:
                    best_valid_loss = meter_loss_rmse.avg
                    torch.save(model_dy.state_dict(), '%s/net_best_dy.pth' % (train_dir))
def train_dynamics(config, train_dir, data_dir, model_dy, global_iteration,
                   writer):

    # load the data
    multi_episode_dict = DrakeSimEpisodeReader.load_dataset(
        data_dir, load_image_data=False)
    '''
    for episode_name in list(multi_episode_dict.keys()):
        print("episode name", episode_name)
        episode = multi_episode_dict[episode_name]
        obs = episode.get_observation(34)
        print(obs)
    '''

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=multi_episode_dict,
            phase=phase)

        # print(config['train'])

        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'],
            drop_last=True)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()
    '''
    define model for dynamics prediction
    '''
    if model_dy is None:
        model_dy = DynaNetMLP(config)

    # criterion
    MSELoss = nn.MSELoss()
    L1Loss = nn.L1Loss()

    # optimizer
    params = model_dy.parameters()
    lr = float(config['train']['lr'])
    optimizer = optim.Adam(params,
                           lr=lr,
                           betas=(config['train']['adam_beta1'], 0.999))

    # setup scheduler
    sc = config['train']['lr_scheduler']
    scheduler = None

    if config['train']['lr_scheduler']['enabled']:
        if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau":
            scheduler = ReduceLROnPlateau(optimizer,
                                          mode='min',
                                          factor=sc['factor'],
                                          patience=sc['patience'],
                                          threshold_mode=sc['threshold_mode'],
                                          cooldown=sc['cooldown'],
                                          verbose=True)
        elif config['train']['lr_scheduler']['type'] == "StepLR":
            step_size = config['train']['lr_scheduler']['step_size']
            gamma = config['train']['lr_scheduler']['gamma']
            scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
        else:
            raise ValueError("unknown scheduler type: %s" %
                             (config['train']['lr_scheduler']['type']))

    if use_gpu:
        print("using gpu")
        model_dy = model_dy.cuda()

    best_valid_loss = np.inf
    counters = {'train': 0, 'valid': 0}

    try:
        for epoch in range(config['train']['n_epoch']):
            phases = ['train', 'valid']

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:
                model_dy.train(phase == 'train')

                meter_loss_rmse = AverageMeter()
                step_duration_meter = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    loss_container = dict()  # store the losses for this step

                    step_start_time = time.time()

                    global_iteration += 1
                    counters[phase] += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" % global_iteration)
                            print("n_samples", n_samples)

                        # [B, n_samples, obs_dim]
                        observations = data['observations']

                        # [B, n_samples, action_dim]
                        actions = data['actions']
                        B = actions.shape[0]

                        if use_gpu:
                            observations = observations.cuda()
                            actions = actions.cuda()

                        # states, actions = data
                        assert actions.shape[1] == n_samples
                        loss_mse = 0.

                        # we don't have any visual observations, so states are observations
                        states = observations

                        # [B, n_his, state_dim]
                        state_init = states[:, :n_his]

                        # We want to rollout n_roll steps
                        # actions = [B, n_his + n_roll, -1]
                        # so we want action_seq.shape = [B, n_roll, -1]
                        action_start_idx = 0
                        action_end_idx = n_his + n_roll - 1
                        action_seq = actions[:, action_start_idx:
                                             action_end_idx, :]

                        if DEBUG:
                            print("states.shape", states.shape)
                            print("state_init.shape", state_init.shape)
                            print("actions.shape", actions.shape)
                            print("action_seq.shape", action_seq.shape)

                        # try using models_dy.rollout_model instead of doing this manually
                        rollout_data = rollout_model(state_init=state_init,
                                                     action_seq=action_seq,
                                                     dynamics_net=model_dy,
                                                     compute_debug_data=False)

                        # [B, n_roll, state_dim]
                        state_rollout_pred = rollout_data['state_pred']

                        # [B, n_roll, state_dim]
                        state_rollout_gt = states[:, n_his:]

                        if DEBUG:
                            print("state_rollout_gt.shape",
                                  state_rollout_gt.shape)
                            print("state_rollout_pred.shape",
                                  state_rollout_pred.shape)

                        # the loss function is between
                        # [B, n_roll, state_dim]
                        state_pred_err = state_rollout_pred - state_rollout_gt

                        # everything is in 3D space now so no need to do any scaling
                        # all the losses would be in meters . . . .
                        loss_mse = MSELoss(state_rollout_pred,
                                           state_rollout_gt)
                        loss_l1 = L1Loss(state_rollout_pred, state_rollout_gt)
                        meter_loss_rmse.update(np.sqrt(loss_mse.item()), B)

                        # compute losses at final step of the rollout
                        mse_final_step = MSELoss(state_rollout_pred[:, -1, :],
                                                 state_rollout_gt[:, -1, :])
                        l2_final_step = torch.norm(state_pred_err[:, -1],
                                                   dim=-1).mean()
                        l1_final_step = L1Loss(state_rollout_pred[:, -1, :],
                                               state_rollout_gt[:, -1, :])

                        loss_container['mse'] = loss_mse
                        loss_container['l1'] = loss_l1
                        loss_container['mse_final_step'] = mse_final_step
                        loss_container['l1_final_step'] = l1_final_step
                        loss_container['l2_final_step'] = l2_final_step

                    step_duration_meter.update(time.time() - step_start_time)
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss_mse.backward()
                        optimizer.step()

                    if i % config['train']['log_per_iter'] == 0:
                        log = '%s %d [%d/%d][%d/%d] LR: %.6f' % (
                            phase, global_iteration, epoch,
                            config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))
                        log += ', rmse: %.6f (%.6f)' % (np.sqrt(
                            loss_mse.item()), meter_loss_rmse.avg)

                        log += ', step time %.6f' % (step_duration_meter.avg)
                        step_duration_meter.reset()

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 100 iterations
                        if global_iteration > 100:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss_MSE/%s" % (phase),
                                              loss_mse.item(),
                                              global_iteration)
                            writer.add_scalar("L1/%s" % (phase),
                                              loss_l1.item(), global_iteration)
                            writer.add_scalar("RMSE average loss/%s" % (phase),
                                              meter_loss_rmse.avg,
                                              global_iteration)

                            writer.add_scalar("n_taj", len(multi_episode_dict),
                                              global_iteration)

                            for loss_type, loss_obj in loss_container.items():
                                plot_name = "Loss/%s/%s" % (loss_type, phase)
                                writer.add_scalar(plot_name, loss_obj.item(),
                                                  global_iteration)

                    if phase == 'train' and global_iteration % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_iter_%d' %
                            (train_dir, global_iteration))

                log = '%s %d [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, global_iteration, epoch, config['train']['n_epoch'],
                    meter_loss_rmse.avg, best_valid_loss)
                print(log)

                if phase == "train":
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "StepLR"):
                        scheduler.step()

                if phase == 'valid':
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "ReduceLROnPlateau"):
                        scheduler.step(meter_loss_rmse.avg)

                    if meter_loss_rmse.avg < best_valid_loss:
                        best_valid_loss = meter_loss_rmse.avg
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_iter_%d_keyboard_interrupt' %
            (train_dir, global_iteration))
        writer.flush()  # flush SummaryWriter events to disk

    return model_dy, global_iteration
def test_dynanet_mlp():
    # just try doing a single forward pass
    dataset, config = create_pusher_slider_dataset()
    stats = dataset.compute_dataset_statistics()

    n_history = config["train"]["n_history"]
    # obs_mean_repeat = stats['observations']['mean'].repeat(n_history, 1)
    # obs_std_repeat = stats['observations']['std'].repeat(n_history, 1)
    obs_mean_repeat = stats['observations']['mean']
    obs_std_repeat = stats['observations']['std']
    observations_normalizer = DataNormalizer(obs_mean_repeat, obs_std_repeat)

    # action_mean_repeat = stats['actions']['mean'].repeat(n_history, 1)
    # action_std_repeat = stats['actions']['std'].repeat(n_history, 1)
    action_mean_repeat = stats['actions']['mean']
    action_std_repeat = stats['actions']['std']
    actions_normalizer = DataNormalizer(action_mean_repeat, action_std_repeat)

    config["dataset"]["state_dim"] = 5
    config["dataset"]["action_dim"] = 2
    model = DynaNetMLP(config)

    # print summary of model before adding new modules
    print("\n\n -----summary of model BEFORE adding normalization modules")
    print("num trainable parameters", count_trainable_parameters(model))
    print("num non-trainable parameters ",
          count_non_trainable_parameters(model))
    print("\n\n")

    # summary of model after adding new params
    model.set_action_normalizer(actions_normalizer)
    model.set_state_normalizer(observations_normalizer)

    print("\n\n -----summary of model AFTER adding normalization modules")
    print("num trainable parameters", count_trainable_parameters(model))
    print("num non-trainable parameters ",
          count_non_trainable_parameters(model))
    print("\n\n")

    # unsqueeze to mimic dataloader with batch size of 1
    data = dataset[0]  # test the getitem
    observations = data['observations'].unsqueeze(0)
    actions = data['actions'].unsqueeze(0)

    obs_slice = observations[:, :n_history, :]
    action_slice = actions[:, :n_history, :]

    print("action_slice.shape", action_slice.shape)
    print("obs_slice.shape", obs_slice.shape)

    # run the model forwards one timestep
    output = model.forward(obs_slice, action_slice)

    print("output.shape", output.shape)

    # save the model with torch.save and torch.load
    save_dir = os.path.join(get_project_root(), 'sandbox')
    model_save_file = os.path.join(save_dir, "model.pth")
    torch.save(model, model_save_file)

    # load the model
    model_load = torch.load(model_save_file)
    print("\n\n -----summary of model LOADED from disk")
    print("num trainable parameters", count_trainable_parameters(model_load))
    print("num non-trainable parameters ",
          count_non_trainable_parameters(model_load))
    print("\n\n")

    # now try doing the same but with the state dict
    # my hunch is that this won't work . . .
    params_save_file = os.path.join(save_dir, "model_params.pth")
    torch.save(model.state_dict(), params_save_file)

    # load the model
    model_load = DynaNetMLP(config)
    state_dict = torch.load(params_save_file)
    for param_tensor in state_dict:
        print(param_tensor, "\t", state_dict[param_tensor].size())

    # try creating some dummy DataNormalizer objects
    # model_load.set_state_normalizer(DataNormalizer(0.0,1.0))
    # model_load.set_action_normalizer(DataNormalizer(0.0,1.0))
    model_load.load_state_dict(state_dict)
    print("\n\n -----summary of model LOADED from disk with state_dict method")
    print("num trainable parameters", count_trainable_parameters(model_load))
    print("num non-trainable parameters ",
          count_non_trainable_parameters(model_load))
    print("\n\n")

    print("model_load._action_normalizer._mean",
          model_load.action_normalizer._mean)
    print("model._action_normalizer._mean", model.action_normalizer._mean)
def mpc_w_learned_dynamics(config,
                           train_dir,
                           mpc_dir,
                           state_dict_path=None,
                           keypoint_observation=False):

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    tee = Tee(os.path.join(mpc_dir, 'mpc.log'), 'w')

    print(config)

    use_gpu = torch.cuda.is_available()
    '''
    model
    '''
    if config['dynamics']['model_type'] == 'mlp':
        model_dy = DynaNetMLP(config)
    else:
        raise AssertionError("Unknown model type %s" %
                             config['dynamics']['model_type'])

    # print model #params
    print("model #params: %d" % count_trainable_parameters(model_dy))

    if state_dict_path is None:
        if config['mpc']['mpc_dy_epoch'] == -1:
            state_dict_path = os.path.join(train_dir, 'net_best_dy.pth')
        else:
            state_dict_path = os.path.join(
                train_dir, 'net_dy_epoch_%d_iter_%d.pth' % \
                (config['mpc']['mpc_dy_epoch'], config['mpc']['mpc_dy_iter']))

        print("Loading saved ckp from %s" % state_dict_path)

    model_dy.load_state_dict(torch.load(state_dict_path))
    model_dy.eval()

    if use_gpu:
        model_dy.cuda()

    criterionMSE = nn.MSELoss()

    # generate action/observation functions
    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    # planner
    planner = planner_from_config(config)
    '''
    env
    '''
    # set up goal
    obs_goals = np.array([[
        262.9843, 267.3102, 318.9369, 351.1229, 360.2048, 323.5128, 305.6385,
        240.4460, 515.4230, 347.8708
    ],
                          [
                              381.8694, 273.6327, 299.6685, 331.0925, 328.7724,
                              372.0096, 411.0972, 314.7053, 517.7299, 268.4953
                          ],
                          [
                              284.8728, 275.7985, 374.0677, 320.4990, 395.4019,
                              275.4633, 306.2896, 231.4310, 507.0849, 312.4057
                          ],
                          [
                              313.1638, 271.4258, 405.0255, 312.2325, 424.7874,
                              266.3525, 333.6973, 225.7708, 510.1232, 305.3802
                          ],
                          [
                              308.6859, 270.9629, 394.2789, 323.2781, 419.7905,
                              280.1602, 333.8901, 228.1624, 519.1964, 321.5318
                          ],
                          [
                              386.8067, 284.8947, 294.2467, 323.2223, 313.3221,
                              368.9970, 405.9415, 330.9298, 495.9970, 268.9920
                          ],
                          [
                              432.0219, 299.6021, 340.8581, 339.4676, 360.2354,
                              384.5515, 451.4394, 345.2190, 514.6357, 291.2043
                          ],
                          [
                              351.3389, 264.5325, 267.5279, 318.2321, 293.7460,
                              360.0423, 378.4428, 306.9586, 516.4390, 259.7810
                          ],
                          [
                              521.1902, 254.0693, 492.7884, 349.7861, 539.6320,
                              364.5190, 569.2258, 268.8824, 506.9431, 286.9752
                          ],
                          [
                              264.8554, 275.9547, 338.1317, 345.3435, 372.7012,
                              308.4648, 299.3454, 239.9245, 506.2117, 373.8413
                          ]])

    for mpc_idx in range(config['mpc']['num_episodes']):
        if keypoint_observation:
            mpc_episode_keypoint_observation(config,
                                             mpc_idx,
                                             model_dy,
                                             mpc_dir,
                                             planner,
                                             obs_goals[mpc_idx],
                                             action_function,
                                             observation_function,
                                             use_gpu=use_gpu)
        else:
            # not supported for now
            raise AssertionError("currently only support keypoint observation")
Exemple #8
0
def eval_dynamics(config,
                  train_dir,
                  eval_dir,
                  state_dict_path=None,
                  keypoint_observation=False,
                  debug=False,
                  render_human=False):

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    tee = Tee(os.path.join(eval_dir, 'eval.log'), 'w')

    print(config)

    use_gpu = torch.cuda.is_available()
    '''
    model
    '''
    model_dy = DynaNetMLP(config)

    # print model #params
    print("model #params: %d" % count_trainable_parameters(model_dy))

    if state_dict_path is None:
        if config['eval']['eval_dy_epoch'] == -1:
            state_dict_path = os.path.join(train_dir, 'net_best_dy.pth')
        else:
            state_dict_path = os.path.join(
                train_dir, 'net_dy_epoch_%d_iter_%d.pth' % \
                (config['eval']['eval_dy_epoch'], config['eval']['eval_dy_iter']))

        print("Loading saved ckp from %s" % state_dict_path)

    model_dy.load_state_dict(torch.load(state_dict_path))
    model_dy.eval()

    if use_gpu:
        model_dy.cuda()

    criterionMSE = nn.MSELoss()
    bar = ProgressBar()

    st_idx = config['eval']['eval_st_idx']
    ed_idx = config['eval']['eval_ed_idx']

    # load the data
    episodes = load_episodes_from_config(config)

    # generate action/observation functions
    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    dataset = MultiEpisodeDataset(config,
                                  action_function=action_function,
                                  observation_function=observation_function,
                                  episodes=episodes,
                                  phase="valid")

    episode_names = dataset.get_episode_names()
    episode_names.sort()

    num_episodes = None
    # for backwards compatibility
    if "num_episodes" in config["eval"]:
        num_episodes = config["eval"]["num_episodes"]
    else:
        num_episodes = 10

    episode_list = []
    if debug:
        episode_list = [episode_names[0]]
    else:
        episode_list = episode_names[:num_episodes]

    for roll_idx, episode_name in enumerate(episode_list):
        print("episode_name", episode_name)
        if keypoint_observation:
            eval_episode_keypoint_observations(config,
                                               dataset,
                                               episode_name,
                                               roll_idx,
                                               model_dy,
                                               eval_dir,
                                               start_idx=9,
                                               n_prediction=30,
                                               render_human=render_human)
        else:
            eval_episode(config,
                         dataset,
                         episode_name,
                         roll_idx,
                         model_dy,
                         eval_dir,
                         start_idx=9,
                         n_prediction=30,
                         render_human=render_human)