def predict(self, state, deterministic_=True, evaluation_=False, p=None, sim_index=None, bootstrapping=False): # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type']) # states[0, ...] = state # state = np.array(state, dtype=self._settings['float_type']) state = norm_state(state, self._state_bounds) state = np.array(state, dtype=self._settings['float_type']) self._model.setStates(state) # action_ = lasagne.layers.get_output(self._model.getActorNetwork(), state, deterministic=deterministic_).mean() # action_ = scale_action(self._q_action()[0], self._action_bounds) # if deterministic_: action_ = scale_action( self._model.getActorNetwork().predict( state, batch_size=1)[:, :self._action_length], self._action_bounds) # action_ = scale_action(self._q_action_target()[0], self._action_bounds) # else: # action_ = scale_action(self._q_action()[0], self._action_bounds) # action_ = q_valsActA[0] return action_
def predict(self, state, deterministic_=True, evaluation_=False, p=None, sim_index=None, bootstrapping=False): # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type']) # states[0, ...] = state """ if ( ('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])): pass else: """ # print ("Agent state bounds: ", self._state_bounds) state = norm_state(state, self._state_bounds) # print ("Agent normalized state: ", state) state = np.array(state, dtype=self._settings['float_type']) self._model.setStates(state) # action_ = lasagne.layers.get_output(self._model.getActorNetwork(), state, deterministic=deterministic_).mean() # action_ = scale_action(self._q_action()[0], self._action_bounds) # if deterministic_: if (('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])): action_ = self._q_action() # action_ = scale_action(self._q_action()[0], self._action_bounds) else: action_ = scale_action(self._q_action(), self._action_bounds) # print ("Agent Scaled action: ", action_) # action_ = scale_action(self._q_action_target()[0], self._action_bounds) # else: # action_ = scale_action(self._q_action()[0], self._action_bounds) # action_ = q_valsActA[0] return action_
def predictWithDropout(self, state, deterministic_=True): # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type']) # states[0, ...] = state state = np.array(state, dtype=self._settings['float_type']) state = norm_state(state, self._state_bounds) action_ = scale_action( self._model.getActorNetwork().predict( states, batch_size=1)[:, :self._action_length], self._action_bounds) # else: # action_ = scale_action(self._q_action()[0], self._action_bounds) # action_ = q_valsActA[0] return action_
def predict(self, state, deterministic_=True, evaluation_=False, p=None, sim_index=None, bootstrapping=False): state = norm_state(state, self._state_bounds) state = np.array(state, dtype=self._settings['float_type']) action_ = scale_action( self._model.getActorNetwork().predict( state, batch_size=1)[:, :self._action_length], self._action_bounds) return action_
def predict(self, state, deterministic_=True, evaluation_=False, p=None, sim_index=None, bootstrapping=False): state = norm_state(state, self._state_bounds) state = np.array(state, dtype=self._settings['float_type']) self._model.setStates(state) if (('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])): action_ = self._q_action()[0] else: action_ = scale_action( self._q_action()[0], self._action_bounds) # transform the action value to a range return action_
def predictWithDropout(self, state, deterministic_=True): # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type']) # states[0, ...] = state """ if ( ('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])): pass else: """ state = np.array(state, dtype=self._settings['float_type']) state = norm_state(state, self._state_bounds) self._model.setStates(state) # action_ = lasagne.layers.get_output(self._model.getActorNetwork(), state, deterministic=deterministic_).mean() # action_ = scale_action(self._q_action()[0], self._action_bounds) # if deterministic_: action_ = scale_action(self._q_action_drop(), self._action_bounds) # else: # action_ = scale_action(self._q_action()[0], self._action_bounds) # action_ = q_valsActA[0] return action_
def predictWithDropout(self, state, deterministic_=True): state = np.array(state, dtype=self._settings['float_type']) state = norm_state(state, self._state_bounds) self._model.setStates(state) action_ = scale_action(self._q_action_drop()[0], self._action_bounds) return action_