Beispiel #1
0
 def __init__(self, network_args):
     super(DirectQEstimator, self).__init__()
     self.q_value_models = nn.ModuleList([get_model(copy.deepcopy(network_args)) for _ in range(2)])
     self.target_q_value_models = nn.ModuleList([get_model(copy.deepcopy(network_args)) for _ in range(2)])
     q_model_output = self.q_value_models[0].n_out
     self.q_value_variables = nn.ModuleList([get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2)])
     self.target_q_value_variables = nn.ModuleList([get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2)])
     self.q_std = None
 def __init__(self, network_args, lr, n_inf_iters):
     super(DirectGradientInference, self).__init__()
     self.inference_model = get_model(network_args)
     self.optimizer = optim.SGD
     self.lr = lr
     self.n_inf_iters = n_inf_iters
     # keep track of estimated objectives for reporting
     self.estimated_objectives = []
Beispiel #3
0
 def __init__(self, network_args, n_inf_iters):
     super(IterativeInferenceModel, self).__init__()
     self.inference_model = get_model(network_args)
     self.n_inf_iters = n_inf_iters
     # keep track of estimated objectives for reporting
     self.estimated_objectives = []
     # keep track of parameters for analysis
     self.dist_params = []
Beispiel #4
0
    def __init__(self,
                 network_args,
                 model_args,
                 horizon,
                 learn_reward=True,
                 value_estimate='retrace'):
        super(ModelBasedQEstimator, self).__init__()
        # direct Q-value model
        self.q_value_models = nn.ModuleList(
            [get_model(copy.deepcopy(network_args)) for _ in range(2)])
        self.target_q_value_models = nn.ModuleList(
            [get_model(copy.deepcopy(network_args)) for _ in range(2)])
        q_model_output = self.q_value_models[0].n_out
        self.q_value_variables = nn.ModuleList([
            get_variable(type='value', args={'n_input': q_model_output})
            for _ in range(2)
        ])
        self.target_q_value_variables = nn.ModuleList([
            get_variable(type='value', args={'n_input': q_model_output})
            for _ in range(2)
        ])

        # model
        self.state_likelihood_model = get_model(
            model_args['state_likelihood_args'])
        model_args['state_variable_args'][
            'n_input'] = self.state_likelihood_model.n_out
        self.state_variable = get_variable(
            type='observed', args=model_args['state_variable_args'])

        self.reward_likelihood_model = None
        if learn_reward:
            self.reward_likelihood_model = get_model(
                model_args['reward_likelihood_args'])
            model_args['reward_variable_args'][
                'n_input'] = self.reward_likelihood_model.n_out
            self.reward_variable = get_variable(
                type='observed', args=model_args['reward_variable_args'])
        else:
            raise NotImplementedError

        # hyper-parameters and internal attributes
        self.horizon = horizon
        self.value_estimate = value_estimate
        self.q_std = None
Beispiel #5
0
 def __init__(self, network_args, n_inf_iters, encoding_type='grads'):
     super(IterativeInferenceModel, self).__init__()
     self.inference_model = get_model(network_args)
     self.n_inf_iters = n_inf_iters
     assert encoding_type in ['grads', 'errors']
     self.encoding_type = encoding_type
     # keep track of estimated objectives for reporting
     self.estimated_objectives = []
     # keep track of parameters for analysis
     self.dist_params = []
def sample_realnvp_images(hparams):
    model = nvp_model.get_model(
        model_dir=os.path.dirname(hparams.checkpoint_path))
    model = model.eval()
    model = model.cuda()
    z = model.sample_z(n=hparams.batch_size)
    x = model.postprocess(model.inverse(z))
    x = x.detach().cpu().numpy()

    images = {i: image.reshape(1, -1) for (i, image) in enumerate(x)}
    return images
Beispiel #7
0
    def __init__(self, model_args=None):
        super(GoalBasedQEstimator, self).__init__()
        # model
        if model_args:
            self.state_likelihood_model = get_model(
                model_args['state_likelihood_args'])
            model_args['state_variable_args'][
                'n_input'] = self.state_likelihood_model.n_out
            self.state_variable = get_variable(
                type='observed', args=model_args['state_variable_args'])
        else:
            self.state_likelihood_model = None
            self.state_variable = None
        self.reward_likelihood_model = None
        self.reward_variable = None

        # hyper-parameters and internal attributes
        self.goal_state = None
        self.goal_std = 1.
        self.horizon = 1

        self.errors = {}
Beispiel #8
0
    def __init__(self, prior_args, approx_post_args, prior_model_args,
                 q_value_estimator_args, inference_optimizer_args, misc_args,
                 direct_inference_optimizer_args, direct_approx_post_args,
                 state_value_estimator_args):
        super(Agent, self).__init__()

        misc_args = postprocess_misc_args(misc_args)

        # prior
        self.prior_model = get_model(prior_model_args)
        self.target_prior_model = copy.deepcopy(self.prior_model)
        if self.prior_model is not None:
            prior_args['n_input'] = self.prior_model.n_out
        else:
            prior_args['n_input'] = None
        self.prior = Distribution(**prior_args)
        self.target_prior = Distribution(**prior_args)

        # approximate posterior
        self.inference_optimizer = get_inference_optimizer(
            inference_optimizer_args)
        if 'inference_model' in dir(self.inference_optimizer):
            approx_post_args[
                'n_input'] = self.inference_optimizer.inference_model.n_out
        else:
            approx_post_args['n_input'] = None
        self.approx_post = Distribution(**approx_post_args)
        self.target_inference_optimizer = self.target_approx_post = None
        if misc_args['inf_target_kl'] or misc_args['target_inf_value_targets']:
            self.target_inference_optimizer = copy.deepcopy(
                self.inference_optimizer)
            self.target_approx_post = Distribution(**approx_post_args)

        # optional direct inference optimizer for model-based value estimation
        self.direct_inference_optimizer = self.direct_approx_post = None
        if direct_inference_optimizer_args is not None:
            self.direct_inference_optimizer = get_inference_optimizer(
                direct_inference_optimizer_args)
            direct_approx_post_args[
                'n_input'] = self.direct_inference_optimizer.inference_model.n_out
            self.direct_approx_post = Distribution(**direct_approx_post_args)

        # Q-value estimator
        self.q_value_estimator = get_value_estimator('action',
                                                     q_value_estimator_args)

        # state value estimator
        self.state_value_estimator = None
        if state_value_estimator_args is not None:
            self.state_value_estimator = get_value_estimator(
                'state', state_value_estimator_args)

        # Lagrange multipliers for KL, location KL, and scale KL
        self.log_alphas = nn.ParameterDict({
            'pi':
            nn.Parameter(torch.zeros(1)),
            'loc':
            nn.Parameter(torch.zeros(1)),
            'scale':
            nn.Parameter(torch.zeros(1))
        })

        if misc_args['inf_target_kl']:
            self.log_alphas['target_inf'] = nn.Parameter(torch.zeros(1))

        # miscellaneous
        self.epsilons = misc_args['epsilons']
        self.n_action_samples = misc_args['n_action_samples']
        self.n_q_action_samples = misc_args['n_q_action_samples']
        self.postprocess_action = misc_args['postprocess_action']
        self.reward_discount = misc_args['reward_discount']
        self.retrace_lambda = misc_args['retrace_lambda']
        self.model_value_targets = misc_args['model_value_targets']
        self.optimize_targets = misc_args['optimize_targets']
        self.direct_targets = misc_args['direct_targets']
        self.off_policy_targets = misc_args['off_policy_targets']
        self.inf_target_kl = misc_args['inf_target_kl']
        self.target_inf_value_targets = misc_args['target_inf_value_targets']
        self.critic_grad_penalty = misc_args['critic_grad_penalty']
        self.pessimism = misc_args['pessimism']
        self.optimism = misc_args['optimism']

        # mode (either 'train' or 'eval')
        self.mode = 'train'

        # collects relevant quantities
        self.collector = Collector(self)

        # internal variables
        self.batch_size = 1
        self._prev_action = self._prev_state = None
Beispiel #9
0
log_path, log_dir = init_log(log_root, train_config)
print 'Experiment: ' + log_dir

global vis
vis, handle_dict = init_plot(train_config, arch, env=log_dir)

# load data, labels
data_path = train_config['data_path']
train_loader, val_loader, label_names = load_data(
    train_config['dataset'],
    data_path,
    train_config['batch_size'],
    cuda_device=train_config['cuda_device'])

# construct model
model = get_model(train_config, arch, train_loader)

# get optimizers
(enc_opt, enc_scheduler), (dec_opt,
                           dec_scheduler), start_epoch = get_optimizers(
                               train_config, arch, model)

for epoch in range(start_epoch + 1, 2000):
    print 'Epoch: ' + str(epoch + 1)
    # train
    tic = time.time()
    model.train()
    train(model, train_config, arch, train_loader, epoch + 1, handle_dict,
          (enc_opt, dec_opt))
    toc = time.time()
    print 'Training Time: ' + str(toc - tic)
Beispiel #10
0
 def __init__(self, network_args):
     super(DirectInferenceModel, self).__init__()
     self.inference_model = get_model(network_args)
     self.n_inf_iters = 1