def __init__(self, network_args): super(DirectQEstimator, self).__init__() self.q_value_models = nn.ModuleList([get_model(copy.deepcopy(network_args)) for _ in range(2)]) self.target_q_value_models = nn.ModuleList([get_model(copy.deepcopy(network_args)) for _ in range(2)]) q_model_output = self.q_value_models[0].n_out self.q_value_variables = nn.ModuleList([get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2)]) self.target_q_value_variables = nn.ModuleList([get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2)]) self.q_std = None
def __init__(self, network_args, model_args, horizon, learn_reward=True, value_estimate='retrace'): super(ModelBasedQEstimator, self).__init__() # direct Q-value model self.q_value_models = nn.ModuleList( [get_model(copy.deepcopy(network_args)) for _ in range(2)]) self.target_q_value_models = nn.ModuleList( [get_model(copy.deepcopy(network_args)) for _ in range(2)]) q_model_output = self.q_value_models[0].n_out self.q_value_variables = nn.ModuleList([ get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2) ]) self.target_q_value_variables = nn.ModuleList([ get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2) ]) # model self.state_likelihood_model = get_model( model_args['state_likelihood_args']) model_args['state_variable_args'][ 'n_input'] = self.state_likelihood_model.n_out self.state_variable = get_variable( type='observed', args=model_args['state_variable_args']) self.reward_likelihood_model = None if learn_reward: self.reward_likelihood_model = get_model( model_args['reward_likelihood_args']) model_args['reward_variable_args'][ 'n_input'] = self.reward_likelihood_model.n_out self.reward_variable = get_variable( type='observed', args=model_args['reward_variable_args']) else: raise NotImplementedError # hyper-parameters and internal attributes self.horizon = horizon self.value_estimate = value_estimate self.q_std = None
def __init__(self, model_args=None): super(GoalBasedQEstimator, self).__init__() # model if model_args: self.state_likelihood_model = get_model( model_args['state_likelihood_args']) model_args['state_variable_args'][ 'n_input'] = self.state_likelihood_model.n_out self.state_variable = get_variable( type='observed', args=model_args['state_variable_args']) else: self.state_likelihood_model = None self.state_variable = None self.reward_likelihood_model = None self.reward_variable = None # hyper-parameters and internal attributes self.goal_state = None self.goal_std = 1. self.horizon = 1 self.errors = {}