Exemple #1
0
 def predict_std(self, state, deterministic_=True):
     # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type'])
     # states[0, ...] = state
     """
     if ( ('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])):
         pass
     else:
     """
     state = norm_state(state, self._state_bounds)
     state = np.array(state, dtype=self._settings['float_type'])
     self._model.setStates(state)
     # action_ = lasagne.layers.get_output(self._model.getActorNetwork(), state, deterministic=deterministic_).mean()
     # action_ = scale_action(self._q_action()[0], self._action_bounds)
     # if deterministic_:
     # action_std = scale_action(self._q_action_std()[0], self._action_bounds)
     if (('disable_parameter_scaling' in self._settings)
             and (self._settings['disable_parameter_scaling'])):
         action_std = self._q_action_std()
         # action_std = self._q_action_std()[0] * (action_bound_std(self._action_bounds))
     else:
         action_std = self._q_action_std() * (action_bound_std(
             self._action_bounds))
     # else:
     # action_ = scale_action(self._q_action()[0], self._action_bounds)
     # action_ = q_valsActA[0]
     return action_std
Exemple #2
0
    def predict_std(self, state, deterministic_=True):
        state = norm_state(state, self._state_bounds)
        state = np.array(state, dtype=self._settings['float_type'])

        # action_std = self._model.getActorNetwork().predict(state, batch_size=1)[:,self._action_length:] * (action_bound_std(self._action_bounds))
        action_std = self._q_action_std([state])[0] * action_bound_std(
            self._action_bounds)
        # print ("Policy std: ", repr(action_std))
        return action_std
 def predict_std(self, state, deterministic_=True):
     state = norm_state(state, self._state_bounds)
     state = np.array(state, dtype=self._settings['float_type'])
     self._model.setStates(state)
     if (('disable_parameter_scaling' in self._settings)
             and (self._settings['disable_parameter_scaling'])):
         action_std = self._q_action_std()[0]
     else:
         action_std = self._q_action_std()[0] * (action_bound_std(
             self._action_bounds))
     return action_std
Exemple #4
0
 def predict_std(self, state, deterministic_=True):
     state = norm_state(state, self._state_bounds)
     state = np.array(state, dtype=self._settings['float_type'])
     self._model.setStates(state)
     if (('disable_parameter_scaling' in self._settings)
             and (self._settings['disable_parameter_scaling'])):
         action_std = self._model.getActorNetwork().predict(
             state, batch_size=1)[:, self._action_length:]
         # action_std = self._q_action_std()[0] * (action_bound_std(self._action_bounds))
     else:
         action_std = self._model.getActorNetwork().predict(
             state, batch_size=1)[:, self._action_length:] * (
                 action_bound_std(self._action_bounds))
     return action_std