Python Interpolator.get_q Exemples

Langage de programmation: Python

Espace de nommage/Pack: interpolator

Class/Type: Interpolator

Méthode/Fonction: get_q

Exemples au hotexamples.com: 2

Python Interpolator.get_q - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de interpolator.Interpolator.get_q extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Interpolator(12)

set_u(3)

set_q(3)

Interpolate(2)

update_function(2)

get_q(2)

valueAtIndex(1)

to_array(1)

smooth_function(1)

process(1)

printTime(1)

interpolate(1)

ndarray2tensor(1)

get_quality(1)

from_array(1)

e(1)

derivative(1)

cut_data(1)

add_item(1)

addIndexValue(1)

__init__(1)

y(1)

Méthodes fréquemment utilisées

Interpolator (12)

set_u (3)

set_q (3)

Interpolate (2)

update_function (2)

get_q (2)

valueAtIndex (1)

to_array (1)

smooth_function (1)

process (1)

Méthodes fréquemment utilisées

printTime (1)

interpolate (1)

ndarray2tensor (1)

get_quality (1)

from_array (1)

e (1)

derivative (1)

cut_data (1)

add_item (1)

addIndexValue (1)

__init__ (1)

y (1)

Méthodes fréquemment utilisées

__init__ (1)

y (1)

Exemple #1

0

Afficher le fichier

def get_x_y(data_list): interpolator = Interpolator() interpolator.set_u(ACTIONS) x = [] y = [] for data_row in data_list: new_q = data_row["reward"] if not data_row["done"]: new_q += DISCOUNT * np.max(data_row["next_qualities"]) interpolator.set_q(data_row["qualities"]) interpolator.update_function(data_row["action"], new_q) x.append(data_row["state"]) y.append(interpolator.get_q()) return x, y

Exemple #2

0

Afficher le fichier

Fichier : dqn_agent.py Projet : TimoLoomets/FSTT_dynamics

def train(self, terminal_state): # Start training only if certain number of samples is already saved if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE: return # Calculate Prioritized Experience Replay weights current_states = np.array([transition[0] for transition in self.replay_memory]) future_states = np.array([transition[3] for transition in self.replay_memory]) current_qs = self.model.predict(current_states) future_qs = self.target_model.predict(future_states) p = np.array([abs((reward + DISCOUNT * np.amax(future_qs[index]) if not done else reward) - current_qs[index][ACTIONS.index(action)]) for index, (_, action, reward, _, done) in enumerate(self.replay_memory)]) p = np.interp(p, (p.min(), p.max()), (0, +1)) p /= np.sum(p) # Get a minibatch of random samples from memory replay table minibatch = np.array(self.replay_memory)[np.random.choice(len(self.replay_memory), size=MINIBATCH_SIZE, replace=False, p=p)] # random.sample(self.replay_memory, MINIBATCH_SIZE) # Get current states from minibatch, then query NN model for Q values current_states = np.array([transition[0] for transition in minibatch]) # / 255 current_qs_list = self.model.predict(current_states) # Get future states from minibatch, then query NN model for Q values # When using target network, query it, otherwise main network should be queried new_current_states = np.array([transition[3] for transition in minibatch]) # / 255 future_target_qs_list = self.target_model.predict(new_current_states) future_model_qs_list = self.model.predict(new_current_states) x = [] y = [] interpolator = Interpolator() # Now we need to enumerate our batches for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch): # If not a terminal state, get new q from future states, otherwise set it to 0 # almost like with Q Learning, but we use just part of equation here future_model_qs_at_index = future_model_qs_list[index] future_target_qs_at_index = future_target_qs_list[index] # future_qs = np.reshape(future_model_qs_at_index, OUTPUT_2D_SHAPE) if not done: max_future_q = future_target_qs_at_index[np.argmax(future_model_qs_at_index)] new_q = reward + DISCOUNT * max_future_q else: new_q = reward # Update Q value for given state current_qs_list_at_index = current_qs_list[index] current_qs = np.reshape(current_qs_list_at_index, OUTPUT_2D_SHAPE) current_actions = ACTIONS current_qualities = current_qs interpolator.set_u(current_actions) interpolator.set_q(current_qualities) interpolator.update_function(action, new_q) # current_qs = np.zeros(OUTPUT_2D_SHAPE) # current_qs[:, :2] = interpolator.get_u() current_qs = interpolator.get_q() # [current_actions.index(action)] = [new_q] # # print(current_state) # print(current_qs_list) # print(action) # current_qs[action] = new_q # And append to our training data x.append(current_state) reshaped_current_qs = np.reshape(current_qs, OUTPUT_1D_SHAPE) y.append(reshaped_current_qs) # print("x:", x) # print("y:", y) # Fit on all samples as one batch, log only on terminal state self.model.fit(np.array(x), np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorboard] if terminal_state else None) # Update target network counter every episode if terminal_state: self.target_update_counter += 1 # If counter reaches set value, update target network with weights of main network if self.target_update_counter > UPDATE_TARGET_EVERY: self.target_model.set_weights(self.model.get_weights()) # a = self.model.get_weights() # print(a) self.target_update_counter = 0 self.save_replay_memory()