예제 #1
0
 def __init__(self, network_args):
     super(DirectQEstimator, self).__init__()
     self.q_value_models = nn.ModuleList([get_model(copy.deepcopy(network_args)) for _ in range(2)])
     self.target_q_value_models = nn.ModuleList([get_model(copy.deepcopy(network_args)) for _ in range(2)])
     q_model_output = self.q_value_models[0].n_out
     self.q_value_variables = nn.ModuleList([get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2)])
     self.target_q_value_variables = nn.ModuleList([get_variable(type='value', args={'n_input': q_model_output}) for _ in range(2)])
     self.q_std = None
예제 #2
0
    def __init__(self,
                 network_args,
                 model_args,
                 horizon,
                 learn_reward=True,
                 value_estimate='retrace'):
        super(ModelBasedQEstimator, self).__init__()
        # direct Q-value model
        self.q_value_models = nn.ModuleList(
            [get_model(copy.deepcopy(network_args)) for _ in range(2)])
        self.target_q_value_models = nn.ModuleList(
            [get_model(copy.deepcopy(network_args)) for _ in range(2)])
        q_model_output = self.q_value_models[0].n_out
        self.q_value_variables = nn.ModuleList([
            get_variable(type='value', args={'n_input': q_model_output})
            for _ in range(2)
        ])
        self.target_q_value_variables = nn.ModuleList([
            get_variable(type='value', args={'n_input': q_model_output})
            for _ in range(2)
        ])

        # model
        self.state_likelihood_model = get_model(
            model_args['state_likelihood_args'])
        model_args['state_variable_args'][
            'n_input'] = self.state_likelihood_model.n_out
        self.state_variable = get_variable(
            type='observed', args=model_args['state_variable_args'])

        self.reward_likelihood_model = None
        if learn_reward:
            self.reward_likelihood_model = get_model(
                model_args['reward_likelihood_args'])
            model_args['reward_variable_args'][
                'n_input'] = self.reward_likelihood_model.n_out
            self.reward_variable = get_variable(
                type='observed', args=model_args['reward_variable_args'])
        else:
            raise NotImplementedError

        # hyper-parameters and internal attributes
        self.horizon = horizon
        self.value_estimate = value_estimate
        self.q_std = None
예제 #3
0
    def __init__(self, model_args=None):
        super(GoalBasedQEstimator, self).__init__()
        # model
        if model_args:
            self.state_likelihood_model = get_model(
                model_args['state_likelihood_args'])
            model_args['state_variable_args'][
                'n_input'] = self.state_likelihood_model.n_out
            self.state_variable = get_variable(
                type='observed', args=model_args['state_variable_args'])
        else:
            self.state_likelihood_model = None
            self.state_variable = None
        self.reward_likelihood_model = None
        self.reward_variable = None

        # hyper-parameters and internal attributes
        self.goal_state = None
        self.goal_std = 1.
        self.horizon = 1

        self.errors = {}