Ejemplo n.º 1
0
    def __init__(self,
                 batch_size=100,
                 channels=1,
                 g_dim=128,
                 z_dim=10,
                 rnn_size=256,
                 prior_rnn_layers=1,
                 posterior_rnn_layers=1,
                 predictor_rnn_layers=2,
                 opt=None):

        super().__init__()
        self.batch_size = batch_size
        self.channels = channels
        self.g_dim = g_dim
        self.z_dim = z_dim
        self.rnn_size = rnn_size
        self.prior_rnn_layers = prior_rnn_layers
        self.posterior_rnn_layers = posterior_rnn_layers
        self.predictor_rnn_layers = predictor_rnn_layers
        self.opt = opt

        # LSTMs
        self.frame_predictor = lstm_models.lstm(
            self.g_dim + self.z_dim + 1 + 1, self.g_dim, self.rnn_size,
            self.predictor_rnn_layers, self.batch_size)
        self.posterior = lstm_models.gaussian_lstm(
            self.g_dim + self.g_dim + 1 + 1, self.z_dim, self.rnn_size,
            self.posterior_rnn_layers, self.batch_size)
        self.prior = lstm_models.gaussian_lstm(self.g_dim + self.g_dim + 1 + 1,
                                               self.z_dim, self.rnn_size,
                                               self.prior_rnn_layers,
                                               self.batch_size)

        # encoder & decoder
        if opt.dataset == 'h36m':
            self.encoder = opt.backbone_net.encoder(out_dim=self.g_dim,
                                                    h_dim=self.g_dim)
            self.decoder = opt.backbone_net.decoder(in_dim=self.g_dim,
                                                    h_dim=self.g_dim)
        else:
            self.encoder = opt.backbone_net.encoder(self.g_dim, self.channels)
            self.decoder = opt.backbone_net.decoder(self.g_dim, self.channels)

        # optimizer
        opt.optimizer = optim.Adam

        # criterions
        self.mse_criterion = nn.MSELoss()  # recon and cpc
        self.kl_criterion = criterion.KLCriterion(opt=self.opt)
        self.align_criterion = nn.MSELoss()

        self.init_weight()
        self.init_optimizer()
Ejemplo n.º 2
0
    def __init__(self, params):

        self.params = params
        self.loss_function = nn.MSELoss().cuda()
        # choose device
        self.cuda = params["cuda"] and torch.cuda.is_available()
        torch.manual_seed(params["seed"])
        # Fix numeric divergence due to bug in Cudnn
        torch.backends.cudnn.benchmark = True
        self.device = torch.device("cuda" if self.cuda else "cpu")
        # Initialize model
        if params["noreload"]:
            self.frame_predictor = lstm_models.lstm(params["g_dim"] + params["z_dim"]+params["action_size"], params["g_dim"], params["rnn_size"], params["predictor_rnn_layers"],
                                               params["batch_size"]).cuda()
            self.posterior = lstm_models.gaussian_lstm(params["g_dim"], params["z_dim"], params["rnn_size"], params["posterior_rnn_layers"],
                                                  params["batch_size"]).cuda()

            self.encoder = model.encoder(params["g_dim"], params["n_channels"]).cuda()
            self.decoder = model.decoder(params["g_dim"], params["n_channels"]).cuda()
        else:
            self.load_checkpoint()
        self.frame_predictor.apply(svp_utils.init_weights)
        self.posterior.apply(svp_utils.init_weights)
        self.encoder.apply(svp_utils.init_weights)
        self.decoder.apply(svp_utils.init_weights)

        # Init optimizers
        self.frame_predictor_optimizer = optim.Adam(self.frame_predictor.parameters(), lr=params["learning_rate"], betas=(params["beta1"], 0.999))
        self.posterior_optimizer =  optim.Adam(self.posterior.parameters(), lr=params["learning_rate"], betas=(params["beta1"], 0.999))
        self.encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=params["learning_rate"], betas=(params["beta1"], 0.999))
        self.decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=params["learning_rate"], betas=(params["beta1"], 0.999))
        if params["plot_visdom"]:
            self.plotter = VisdomLinePlotter(env_name=params['env'])
            self.img_plotter = VisdomImagePlotter(env_name=params['env'])


        # Select transformations
        transform = transforms.Lambda(
            lambda x: np.transpose(x, (0, 3, 1, 2)) / 255)
        self.train_loader = DataLoader(
            RolloutSequenceDataset(params["path_data"], params["seq_len"], transform, buffer_size=params["train_buffer_size"]),
            batch_size=params['batch_size'], num_workers=2, shuffle=True, drop_last=True)
        self.test_loader = DataLoader(
            RolloutSequenceDataset(params["path_data"],  params["seq_len"], transform, train=False, buffer_size=params["test_buffer_size"]),
            batch_size=params['batch_size'], num_workers=2, shuffle=False, drop_last=True)
Ejemplo n.º 3
0
elif opt.optimizer == 'sgd':
    opt.optimizer = optim.SGD
else:
    raise ValueError('Unknown optimizer: %s' % opt.optimizer)

import models.lstm as lstm_models
if opt.model_dir != '':
    frame_predictor = saved_model['frame_predictor']
    posterior = saved_model['posterior']
    prior = saved_model['prior']
else:
    frame_predictor = lstm_models.lstm(opt.g_dim + opt.z_dim, opt.g_dim,
                                       opt.rnn_size, opt.predictor_rnn_layers,
                                       opt.batch_size)
    posterior = lstm_models.gaussian_lstm(opt.g_dim, opt.z_dim, opt.rnn_size,
                                          opt.posterior_rnn_layers,
                                          opt.batch_size)
    prior = lstm_models.gaussian_lstm(opt.g_dim, opt.z_dim, opt.rnn_size,
                                      opt.prior_rnn_layers, opt.batch_size)
    frame_predictor.apply(utils.init_weights)
    posterior.apply(utils.init_weights)
    prior.apply(utils.init_weights)

if opt.model == 'dcgan':
    if opt.image_width == 64:
        import models.dcgan_64 as model
    elif opt.image_width == 128:
        import models.dcgan_128 as model
elif opt.model == 'vgg':
    if opt.image_width == 64:
        import models.vgg_64 as model
    if opt.model == 'dcgan':
        if opt.image_width == 64:
            import models.dcgan_64 as model 
        elif opt.image_width == 128:
            import models.dcgan_128 as model  
    elif opt.model == 'vgg':
        if opt.image_width == 64:
            import models.vgg_64 as model
        elif opt.image_width == 128:
            import models.vgg_128 as model
    else:
        raise ValueError('Unknown model: %s' % opt.model)

    # define
    frame_predictor = lstm_models.lstm((opt.factor+1)*opt.z_dim, opt.g_dim, opt.rnn_size, opt.predictor_rnn_layers, int(opt.batch_size/len(opt.gpu_ids)))
    posterior_pose = lstm_models.gaussian_lstm(opt.g_dim+opt.factor*opt.z_dim, opt.z_dim, opt.rnn_size, opt.posterior_rnn_layers, int(opt.batch_size/len(opt.gpu_ids)))
    prior = lstm_models.gaussian_lstm(opt.g_dim+opt.factor*opt.z_dim, opt.z_dim, opt.rnn_size, opt.prior_rnn_layers, int(opt.batch_size/len(opt.gpu_ids)))

    cont_encoder = model.cont_encoder(opt.z_dim*opt.factor, opt.channels*opt.n_past)  #g_dim = 64 or 128
    pose_encoder = model.pose_encoder(opt.g_dim, opt.channels)
    decoder = model.decoder(opt.g_dim, opt.channels)

    # init
    frame_predictor = utils.init_net(frame_predictor, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids)
    posterior_pose = utils.init_net(posterior_pose, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids)
    prior = utils.init_net(prior, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids)

    cont_encoder = utils.init_net(cont_encoder, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids)
    pose_encoder = utils.init_net(pose_encoder, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids)
    decoder = utils.init_net(decoder, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids)