def step(self, action): # Send action and receive next state and reward req = Dqn.Request() req.action = action while not self.rl_agent_interface_client.wait_for_service( timeout_sec=1.0): self.get_logger().info( 'rl_agent interface service not available, waiting again...') future = self.rl_agent_interface_client.call_async(req) rclpy.spin_until_future_complete(self, future) if future.result() is not None: # Next state and reward next_state = future.result().state next_state = np.reshape(np.asarray(next_state), [1, self.state_size]) reward = future.result().reward done = future.result().done else: self.get_logger().error( 'Exception while calling service: {0}'.format( future.exception())) return next_state, reward, done
def process(self): global_step = 0 for episode in range(1, 1000): global_step += 1 local_step = 0 state = list() next_state = list() done = False init = True score = 0 # Reset DQN environment time.sleep(0.05) while not done: local_step += 1 # Aciton based on the current state if local_step == 1: action = 2 # Move forward else: state = next_state action = int(self.get_action(state)) # Send action and receive next state and reward req = Dqn.Request() print(int(action)) req.action = action req.init = init while not self.dqn_com_client.wait_for_service( timeout_sec=1.0): self.get_logger().info( 'service not available, waiting again...') future = self.dqn_com_client.call_async(req) while rclpy.ok(): rclpy.spin_once(self) if future.done(): if future.result() is not None: # Next state and reward next_state = future.result().state reward = future.result().reward done = future.result().done score += reward init = False else: self.get_logger().error( 'Exception while calling service: {0}'.format( future.exception())) break # While loop rate time.sleep(0.01)
def reset_environment(self): while not self.reset_environment_client.wait_for_service( timeout_sec=1.0): self.get_logger().warn( 'Reset environment client failed to connect to the server, try again ...' ) future = self.reset_environment_client.call_async(Dqn.Request()) rclpy.spin_until_future_complete(self, future) if future.result() is not None: state = future.result().state state = np.reshape(np.asarray(state), [1, self.state_size]) else: self.get_logger().error( 'Exception while calling service: {0}'.format( future.exception())) return state
def process(self): global_step = 0 for episode in range(self.load_episode + 1, self.episode_size): global_step += 1 local_step = 0 state = list() next_state = list() done = False init = True score = 0 # Reset DQN environment time.sleep(1.0) while not done: local_step += 1 # Aciton based on the current state if local_step == 1: action = 2 # Move forward else: state = next_state action = int(self.get_action(state)) # Send action and receive next state and reward req = Dqn.Request() print(int(action)) req.action = action req.init = init while not self.dqn_com_client.wait_for_service( timeout_sec=1.0): self.get_logger().info( 'service not available, waiting again...') future = self.dqn_com_client.call_async(req) while rclpy.ok(): rclpy.spin_once(self) if future.done(): if future.result() is not None: # Next state and reward next_state = future.result().state reward = future.result().reward done = future.result().done score += reward init = False else: self.get_logger().error( 'Exception while calling service: {0}'.format( future.exception())) break # Save <s, a, r, s'> samples if local_step > 1: self.append_sample(state, action, reward, next_state, done) # Train model if global_step > self.update_target_model_start: self.train_model(True) elif global_step > self.train_start: self.train_model() if done: # Update neural network self.update_target_model() print("Episode:", episode, "score:", score, "memory length:", len(self.memory), "epsilon:", self.epsilon) param_keys = ['epsilon'] param_values = [self.epsilon] param_dictionary = dict(zip(param_keys, param_values)) # While loop rate time.sleep(0.01) # Update result and save model every 10 episodes if episode % 10 == 0: self.model_path = os.path.join( self.model_dir_path, 'stage' + str(self.stage) + '_episode' + str(episode) + '.h5') self.model.save(self.model_path) with open( os.path.join( self.model_dir_path, 'stage' + str(self.stage) + '_episode' + str(episode) + '.json'), 'w') as outfile: json.dump(param_dictionary, outfile) # Epsilon if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay