def __init__(self, network_args): super(DirectQEstimator, self).__init__() self.q_value_models = nn.ModuleList([get_model(copy.deepcopy(network_args)) for _ in range(2)]) self.target_q_value_models = nn.ModuleList([get_model(copy.deepcopy(network_args)) for _ in range(2)]) q_model_output = self.q_value_models[0].n_out self.q_value_variables = nn.ModuleList([get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2)]) self.target_q_value_variables = nn.ModuleList([get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2)]) self.q_std = None
def __init__(self, network_args, lr, n_inf_iters): super(DirectGradientInference, self).__init__() self.inference_model = get_model(network_args) self.optimizer = optim.SGD self.lr = lr self.n_inf_iters = n_inf_iters # keep track of estimated objectives for reporting self.estimated_objectives = []
def __init__(self, network_args, n_inf_iters): super(IterativeInferenceModel, self).__init__() self.inference_model = get_model(network_args) self.n_inf_iters = n_inf_iters # keep track of estimated objectives for reporting self.estimated_objectives = [] # keep track of parameters for analysis self.dist_params = []
def __init__(self, network_args, model_args, horizon, learn_reward=True, value_estimate='retrace'): super(ModelBasedQEstimator, self).__init__() # direct Q-value model self.q_value_models = nn.ModuleList( [get_model(copy.deepcopy(network_args)) for _ in range(2)]) self.target_q_value_models = nn.ModuleList( [get_model(copy.deepcopy(network_args)) for _ in range(2)]) q_model_output = self.q_value_models[0].n_out self.q_value_variables = nn.ModuleList([ get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2) ]) self.target_q_value_variables = nn.ModuleList([ get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2) ]) # model self.state_likelihood_model = get_model( model_args['state_likelihood_args']) model_args['state_variable_args'][ 'n_input'] = self.state_likelihood_model.n_out self.state_variable = get_variable( type='observed', args=model_args['state_variable_args']) self.reward_likelihood_model = None if learn_reward: self.reward_likelihood_model = get_model( model_args['reward_likelihood_args']) model_args['reward_variable_args'][ 'n_input'] = self.reward_likelihood_model.n_out self.reward_variable = get_variable( type='observed', args=model_args['reward_variable_args']) else: raise NotImplementedError # hyper-parameters and internal attributes self.horizon = horizon self.value_estimate = value_estimate self.q_std = None
def __init__(self, network_args, n_inf_iters, encoding_type='grads'): super(IterativeInferenceModel, self).__init__() self.inference_model = get_model(network_args) self.n_inf_iters = n_inf_iters assert encoding_type in ['grads', 'errors'] self.encoding_type = encoding_type # keep track of estimated objectives for reporting self.estimated_objectives = [] # keep track of parameters for analysis self.dist_params = []
def sample_realnvp_images(hparams): model = nvp_model.get_model( model_dir=os.path.dirname(hparams.checkpoint_path)) model = model.eval() model = model.cuda() z = model.sample_z(n=hparams.batch_size) x = model.postprocess(model.inverse(z)) x = x.detach().cpu().numpy() images = {i: image.reshape(1, -1) for (i, image) in enumerate(x)} return images
def __init__(self, model_args=None): super(GoalBasedQEstimator, self).__init__() # model if model_args: self.state_likelihood_model = get_model( model_args['state_likelihood_args']) model_args['state_variable_args'][ 'n_input'] = self.state_likelihood_model.n_out self.state_variable = get_variable( type='observed', args=model_args['state_variable_args']) else: self.state_likelihood_model = None self.state_variable = None self.reward_likelihood_model = None self.reward_variable = None # hyper-parameters and internal attributes self.goal_state = None self.goal_std = 1. self.horizon = 1 self.errors = {}
def __init__(self, prior_args, approx_post_args, prior_model_args, q_value_estimator_args, inference_optimizer_args, misc_args, direct_inference_optimizer_args, direct_approx_post_args, state_value_estimator_args): super(Agent, self).__init__() misc_args = postprocess_misc_args(misc_args) # prior self.prior_model = get_model(prior_model_args) self.target_prior_model = copy.deepcopy(self.prior_model) if self.prior_model is not None: prior_args['n_input'] = self.prior_model.n_out else: prior_args['n_input'] = None self.prior = Distribution(**prior_args) self.target_prior = Distribution(**prior_args) # approximate posterior self.inference_optimizer = get_inference_optimizer( inference_optimizer_args) if 'inference_model' in dir(self.inference_optimizer): approx_post_args[ 'n_input'] = self.inference_optimizer.inference_model.n_out else: approx_post_args['n_input'] = None self.approx_post = Distribution(**approx_post_args) self.target_inference_optimizer = self.target_approx_post = None if misc_args['inf_target_kl'] or misc_args['target_inf_value_targets']: self.target_inference_optimizer = copy.deepcopy( self.inference_optimizer) self.target_approx_post = Distribution(**approx_post_args) # optional direct inference optimizer for model-based value estimation self.direct_inference_optimizer = self.direct_approx_post = None if direct_inference_optimizer_args is not None: self.direct_inference_optimizer = get_inference_optimizer( direct_inference_optimizer_args) direct_approx_post_args[ 'n_input'] = self.direct_inference_optimizer.inference_model.n_out self.direct_approx_post = Distribution(**direct_approx_post_args) # Q-value estimator self.q_value_estimator = get_value_estimator('action', q_value_estimator_args) # state value estimator self.state_value_estimator = None if state_value_estimator_args is not None: self.state_value_estimator = get_value_estimator( 'state', state_value_estimator_args) # Lagrange multipliers for KL, location KL, and scale KL self.log_alphas = nn.ParameterDict({ 'pi': nn.Parameter(torch.zeros(1)), 'loc': nn.Parameter(torch.zeros(1)), 'scale': nn.Parameter(torch.zeros(1)) }) if misc_args['inf_target_kl']: self.log_alphas['target_inf'] = nn.Parameter(torch.zeros(1)) # miscellaneous self.epsilons = misc_args['epsilons'] self.n_action_samples = misc_args['n_action_samples'] self.n_q_action_samples = misc_args['n_q_action_samples'] self.postprocess_action = misc_args['postprocess_action'] self.reward_discount = misc_args['reward_discount'] self.retrace_lambda = misc_args['retrace_lambda'] self.model_value_targets = misc_args['model_value_targets'] self.optimize_targets = misc_args['optimize_targets'] self.direct_targets = misc_args['direct_targets'] self.off_policy_targets = misc_args['off_policy_targets'] self.inf_target_kl = misc_args['inf_target_kl'] self.target_inf_value_targets = misc_args['target_inf_value_targets'] self.critic_grad_penalty = misc_args['critic_grad_penalty'] self.pessimism = misc_args['pessimism'] self.optimism = misc_args['optimism'] # mode (either 'train' or 'eval') self.mode = 'train' # collects relevant quantities self.collector = Collector(self) # internal variables self.batch_size = 1 self._prev_action = self._prev_state = None
log_path, log_dir = init_log(log_root, train_config) print 'Experiment: ' + log_dir global vis vis, handle_dict = init_plot(train_config, arch, env=log_dir) # load data, labels data_path = train_config['data_path'] train_loader, val_loader, label_names = load_data( train_config['dataset'], data_path, train_config['batch_size'], cuda_device=train_config['cuda_device']) # construct model model = get_model(train_config, arch, train_loader) # get optimizers (enc_opt, enc_scheduler), (dec_opt, dec_scheduler), start_epoch = get_optimizers( train_config, arch, model) for epoch in range(start_epoch + 1, 2000): print 'Epoch: ' + str(epoch + 1) # train tic = time.time() model.train() train(model, train_config, arch, train_loader, epoch + 1, handle_dict, (enc_opt, dec_opt)) toc = time.time() print 'Training Time: ' + str(toc - tic)
def __init__(self, network_args): super(DirectInferenceModel, self).__init__() self.inference_model = get_model(network_args) self.n_inf_iters = 1