Python QLearning.save_q_matrix Exemples

Langage de programmation: Python

Espace de nommage/Pack: q_learning

Class/Type: QLearning

Méthode/Fonction: save_q_matrix

Exemples au hotexamples.com: 2

Python QLearning.save_q_matrix - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de q_learning.QLearning.save_q_matrix extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

QLearning(30)

update(4)

run(3)

train(2)

load_table(2)

learn(2)

greedy_probability_policy(2)

get_policy(2)

get_action(2)

perform_sim_step(2)

set_general_state_action_values(2)

save_q_matrix(2)

choose_action(2)

action_to_maximise_q(2)

choose(2)

train_model(1)

reset_state(1)

plot_progress(1)

predict(1)

previous_action_idx(1)

previous_digitized_state(1)

q_table(1)

read_q_matrix(1)

update_state_action_function(1)

reset_epsilon(1)

reset_reward(1)

update_reward(1)

update_Qtable(1)

update_q_value(1)

transfer_model(1)

run_multiple_episodes(1)

sample_from_experience(1)

update_Q(1)

save(1)

save_experience(1)

save_q_model(1)

save_table(1)

select_action(1)

solve(1)

step(1)

test(1)

plot_avg_cost(1)

lr(1)

perform_lr_decay(1)

episode_companies_3(1)

action_values(1)

add_new_state(1)

assimilar(1)

best_action(1)

calc_new_q_value(1)

Méthodes fréquemment utilisées

QLearning (30)

update (4)

run (3)

train (2)

load_table (2)

learn (2)

greedy_probability_policy (2)

get_policy (2)

get_action (2)

perform_sim_step (2)

Méthodes fréquemment utilisées

set_general_state_action_values (2)

save_q_matrix (2)

choose_action (2)

action_to_maximise_q (2)

choose (2)

train_model (1)

reset_state (1)

plot_progress (1)

predict (1)

previous_action_idx (1)

previous_digitized_state (1)

q_table (1)

read_q_matrix (1)

update_state_action_function (1)

reset_epsilon (1)

reset_reward (1)

update_reward (1)

update_Qtable (1)

update_q_value (1)

transfer_model (1)

Méthodes fréquemment utilisées

previous_digitized_state (1)

q_table (1)

read_q_matrix (1)

update_state_action_function (1)

reset_epsilon (1)

reset_reward (1)

update_reward (1)

update_Qtable (1)

update_q_value (1)

transfer_model (1)

run_multiple_episodes (1)

sample_from_experience (1)

update_Q (1)

save (1)

save_experience (1)

save_q_model (1)

save_table (1)

select_action (1)

solve (1)

step (1)

test (1)

plot_avg_cost (1)

lr (1)

perform_lr_decay (1)

episode_companies_3 (1)

action_values (1)

add_new_state (1)

assimilar (1)

best_action (1)

calc_new_q_value (1)

Méthodes fréquemment utilisées

run_multiple_episodes (1)

sample_from_experience (1)

update_Q (1)

save (1)

save_experience (1)

save_q_model (1)

save_table (1)

select_action (1)

solve (1)

step (1)

test (1)

plot_avg_cost (1)

lr (1)

perform_lr_decay (1)

episode_companies_3 (1)

action_values (1)

add_new_state (1)

assimilar (1)

best_action (1)

calc_new_q_value (1)

calculate_reward (1)

curr_episode (1)

decide_on_action (1)

decreasing_learning_rate (1)

define_state (1)

digitize_state (1)

dump (1)

episode (1)

episode_companies_2 (1)

episode_end (1)

perform_epsilon_decay (1)

episode_start (1)

epsilon (1)

fit (1)

get_best_action (1)

get_highest_tile_value (1)

get_max_q (1)

greedy_policy (1)

is_calculate_next_action (1)

is_set_duration (1)

Exemple #1

0

Afficher le fichier

Fichier : q_learning_training.py Projet : ZhouXing19/RobotFinalProject

class QLearningTraining(object): # Initialize publishers and subscribers def __init__(self): # Publish Q-matrix updates to Q-matrix topic self.q_matrix_pub = rospy.Publisher("/RobotFinalProject/QMatrix", QMatrix, queue_size=10) # Publish to /robot_action for training the Q-matrices self.q_learning = QLearning() self.robot_action_pub = rospy.Publisher( "/RobotFinalProject/robot_action", RobotTasksDoors, queue_size=10) # Subscribe to environment to receive reward updates self.reward = rospy.Subscriber("/RobotFinalProject/reward", QLearningReward, self.update_q_matrix) rospy.sleep(2) # Initialize Q-matrices, one for each robot self.q_matrix_msg_a = QMatrix() self.q_matrix_msg_b = QMatrix() # For ease of use in training, we'll store the Q-matrices in a numpy array self.q_matrix_arr_a = np.zeros((128, 7)) self.q_matrix_arr_b = np.zeros((128, 7)) # Keep track of the 5 most recently updated Q-values per Q-matrix self.q_history_a = [] self.q_history_b = [] # Keeps track of current (starting) state, initialized to 0th state. This state accounts for both robots self.current_state = 0 # Get new state and action from starting state new_state, action = self.get_random_action(self.current_state) # Current action is action from above (action from current state to new state) self.current_action = action # Keep track of new state self.new_state = new_state # Move the robot according to the current action self.complete_task_or_door(self.current_action) # Selects a random valid action from the current state def get_random_action(self, state): # Stores all valid actions at the current state valid_actions = [] # Search each next possible state from the current state for (s, a) in enumerate(self.q_learning.action_matrix[state]): # If valid, append to valid_actions if int(a) != -1: valid_actions.append((s, int(a))) # Select a random action among the valid actions if len(valid_actions) > 0: (new_state, action) = random.choice(valid_actions) return (new_state, action) else: # Otherwise, reset to the initial state self.current_state = 0 s_a_from_origin = self.get_random_action(0) return s_a_from_origin # Send a message to complete a task / close a door def complete_task_or_door(self, action): # Initialize the robot movement self.movement = RobotTasksDoors() # Find the current action current_action = self.q_learning.actions[action] # Set the message fields to the appropriate values self.movement.task_or_door = current_action["task"] self.movement.is_complete = current_action["complete"] # Publish the movement self.robot_action_pub.publish(self.movement) return def update_q_matrix(self, data): # data.reward receives the reward # Same reward function between both robots, b/c fully cooperative # Discount factor gamma = 0.8 # Learning rate alpha = 1.0 # Update Q(s,a) for each robot # 2 Q-functions, 1 for each learning agent (robot), each dependent on actions of both robots # Full equation: Q_x = (1 - alpha) * Q_x + alpha * (reward + gamma * Nash(s, a, b)) # 1 - alpha * Q_x = 0 when alpha = 1, so that is taken out below Q_a = alpha * ( data.reward + gamma * max(self.q_matrix_arr_a[self.new_state]) ) # * max(self.q_matrix_arr_b[self.new_state]) Q_b = alpha * (data.reward + gamma * max(self.q_matrix_arr_b[self.new_state]) + max(self.q_matrix_arr_a[self.new_state])) old_q_a = self.q_matrix_arr_a[self.current_state, self.current_action] old_q_b = self.q_matrix_arr_b[self.current_state, self.current_action] # Append the change in Q-value to q_history to see whether Q-value changes are plateauing self.q_history_a.append(Q_a - old_q_a) self.q_history_b.append(Q_b - old_q_b) # Update the Q-matrix in the current spot with the new Q-value self.q_matrix_arr_a[self.current_state, self.current_action] = Q_a self.q_matrix_arr_b[self.current_state, self.current_action] = Q_b # We need to convert the numpy Q-matrix used for testing back into a QMatrix() message type self.convert_send_qmatrix_msg() # If not converged: if not self.has_converged(): # Update the current state self.current_state = self.new_state # Select the next action new_state, action = self.get_random_action(self.current_state) # Update the new state self.new_state = new_state self.current_action = action # Perform the next action self.complete_task_or_door(self.current_action) return # Determines when the Q-matrix has converged def has_converged(self): # Establish a plateau threshold, i.e. a constant below which the q-value differences should fall in order to # qualify the changes as plateauing max_diff = 0.01 # Algo has converged if # 1) the last 100 q-value differences are below the plateau threshold # 2) we have iterated at least 10000 times if len(self.q_history_a) > 10000 and max( self.q_history_a[-100:]) < max_diff: self.q_learning.save_q_matrix(self.q_matrix_arr_a, "a") print("A Converged!") if len(self.q_history_b) > 10000 and max( self.q_history_b[-100:]) < max_diff: self.q_learning.save_q_matrix(self.q_matrix_arr_b, "b") print("B Converged! You may Ctrl+C") return True return False # Converts numpy arrays to QMatrix msgs def convert_send_qmatrix_msg(self): self.q_matrix_msg_a = QMatrix() self.q_matrix_msg_b = QMatrix() # Convert both Q-matrices into a message format before publishing for i in range(len(self.q_matrix_arr_a)): row = QMatrixRow() row.q_matrix_row = self.q_matrix_arr_a[i].astype(int) self.q_matrix_msg_a.q_matrix.append(row) for i in range(len(self.q_matrix_arr_b)): row = QMatrixRow() row.q_matrix_row = self.q_matrix_arr_b[i].astype(int) self.q_matrix_msg_b.q_matrix.append(row) # Publish Q-matrix messages to Q-matrix topic self.q_matrix_pub.publish(self.q_matrix_msg_a) self.q_matrix_pub.publish(self.q_matrix_msg_b) # Runs until shutdown def run(self): rospy.spin()

Exemple #2

0

Afficher le fichier

Fichier : q_learning_training.py Projet : sdkim0739/q_learning_project

class QLearningTraining(object): # Initialize publishers and subscribers def __init__(self): # Publish Q-matrix updates to Q-matrix topic self.q_matrix_pub = rospy.Publisher("/q_learning/QMatrix", QMatrix, queue_size=10) # Publish to /robot_action for phantom robot movement self.q_learning = QLearning() self.action_pub = rospy.Publisher("/q_learning/robot_action", RobotMoveDBToBlock, queue_size=10) # Subscribe to environment to receive reward updates self.reward = rospy.Subscriber("/q_learning/reward", QLearningReward, self.update_q_matrix) rospy.sleep(2) # Initialize Q-matrix self.q_matrix_msg = QMatrix() # For ease of use in training, we'll store the Q-matrix in a numpy array self.q_matrix_arr = np.zeros((64, 9)) # Keep track of the 5 most recently updated Q-values self.q_history = [] # Keeps track of current (starting) state, initialized to 0th state self.current_state = 0 # Get new state and action from starting state new_state, action = self.get_random_action(self.current_state) # Current action is action from above (action from current state to new state) self.current_action = action # Keep track of new state self.new_state = new_state # Move the robot according to the current action self.move_DB(self.current_action ) # update_q_matrix() will be called as callback # Selects a random valid action from the current state def get_random_action(self, state): # Stores all valid actions at the current state valid_actions = [] # Search each next possible state from the current state for (s, a) in enumerate(self.q_learning.action_matrix[state]): # If valid, append to valid_actions if int(a) != -1: valid_actions.append((s, int(a))) # Select a random action among the valid actions if len(valid_actions) > 0: (new_state, action) = random.choice(valid_actions) return (new_state, action) else: # Otherwise, reset to the initial state self.current_state = 0 s_a_from_origin = self.get_random_action(0) return s_a_from_origin # Publishes actions in order to train the (phantom) robot def move_DB(self, action): self.test_movement = RobotMoveDBToBlock() # Take the first action current_action = self.q_learning.actions[action] # From the action, store the dumbbell being moved and the block to move to self.test_movement.robot_db = current_action['dumbbell'] self.test_movement.block_id = current_action['block'] # Publish the action to the phantom bot self.action_pub.publish(self.test_movement) def update_q_matrix(self, data): # data.reward receives the reward # Discount factor gamma = 0.9 # Update Q(s,a) Q_s_a = data.reward + gamma * max(self.q_matrix_arr[self.new_state]) old_q_value = self.q_matrix_arr[self.current_state, self.current_action] # Append the change in Q-value to q_history to see whether Q-value changes are plateauing self.q_history.append(Q_s_a - old_q_value) # Update the Q-matrix in the current spot with the new Q-value self.q_matrix_arr[self.current_state, self.current_action] = Q_s_a # We need to convert the numpy Q-matrix used for testing back into a QMatrix() message type self.convert_send_qmatrix_msg() # If not converged: if not self.has_converged(): # Update the current state self.current_state = self.new_state # Select the next action new_state, action = self.get_random_action(self.current_state) # Update the new state self.new_state = new_state self.current_action = action # Perform the next action self.move_DB(self.current_action) return # Determines when the Q-matrix has converged def has_converged(self): # Establish a plateau threshold, i.e. a constant below which the q-value differences should fall in order to # qualify the changes as plateauing max_diff = 0.01 # Algo has converged if # 1) the last 100 q-value differences are below the plateau threshold # 2) we have iterated at least 10000 times # TODO: debugging if len(self.q_history) > 10000 and max( self.q_history[-100:]) < max_diff: self.q_learning.save_q_matrix(self.q_matrix_arr) print("Converged! You may Ctrl+C") return True return False def convert_send_qmatrix_msg(self): # Converts numpy array to QMatrix msg self.q_matrix_msg = QMatrix() for i in range(len(self.q_matrix_arr)): row = QMatrixRow() row.q_matrix_row = self.q_matrix_arr[i].astype(int) self.q_matrix_msg.q_matrix.append(row) # Publish Q-matrix message to Q-matrix topic self.q_matrix_pub.publish(self.q_matrix_msg) # Runs until shutdown def run(self): rospy.spin()