score_lst = [] discriminator_score_lst = [] score = 0.0 discriminator_score = 0 if agent_args.on_policy == True: state_lst = [] state_ = (env.reset()) state = np.clip((state_ - state_rms.mean) / (state_rms.var**0.5 + 1e-8), -5, 5) for n_epi in range(args.epochs): for t in range(agent_args.traj_length): if args.render: env.render() state_lst.append(state_) action, log_prob = agent.get_action( torch.from_numpy(state).float().unsqueeze(0).to(device)) next_state_, r, done, info = env.step(action.cpu().numpy()) next_state = np.clip( (next_state_ - state_rms.mean) / (state_rms.var**0.5 + 1e-8), -5, 5) if discriminator_args.is_airl: reward = discriminator.get_reward(\ log_prob,\ torch.tensor(state).unsqueeze(0).float().to(device),action,\ torch.tensor(next_state).unsqueeze(0).float().to(device),\ torch.tensor(done).view(1,1)\ ).item() else: reward = discriminator.get_reward( torch.tensor(state).unsqueeze(0).float().to(device),
class DQNBot: """ An operator to train the dqn agent. """ def __init__(self, robot="r", online=False, policy_mode="epsilon", debug=True, save_path=None, load_path=None, pkl_path=None, manual_avoid=False): """ Args: robot (str): robot namespace ("r" or "b") online (bool): training is done on online or not policy_mode (str): policy ("epsilon" or "boltzmann") debug (bool): debug mode save_path (str): model save path load_path (str): model load path pkl_path (str): dump path manual_avoid (bool): manually avoid walls or not """ # attributes self.robot = robot self.enemy = "b" if robot == "r" else "r" self.online = online self.policy_mode = policy_mode self.debug = debug self.my_markers = ROBOT_MARKERS[self.robot] self.op_markers = ROBOT_MARKERS[self.enemy] self.marker_list = FIELD_MARKERS + self.my_markers + self.op_markers self.score = {k: 0 for k in self.marker_list} self.past_score = {k: 0 for k in self.marker_list} if save_path is None: self.save_path = "../catkin_ws/src/burger_war_dev/burger_war_dev/scripts/models/tmp.pth" else: self.save_path = save_path self.pkl_path = pkl_path # state variables self.lidar_ranges = None self.my_pose = None self.image = None self.mask = None self.state = None self.past_state = None self.action = None # other variables self.game_state = "end" self.step = 0 self.episode = 0 self.bridge = CvBridge() # rostopic subscription self.lidar_sub = rospy.Subscriber('scan', LaserScan, self.callback_lidar) self.image_sub = rospy.Subscriber('image_raw', Image, self.callback_image) self.state_sub = rospy.Timer(rospy.Duration(0.5), self.callback_warstate) if self.debug: if self.robot == "r": self.odom_sub = rospy.Subscriber("red_bot/tracker", Odometry, self.callback_odom) if self.robot == "b": self.odom_sub = rospy.Subscriber("enemy_bot/tracker", Odometry, self.callback_odom) else: self.amcl_sub = rospy.Subscriber("amcl_pose", PoseWithCovarianceStamped, self.callback_amcl) # rostopic publication self.twist_pub = rospy.Publisher('cmd_vel', Twist, queue_size=1) # rostopic service self.state_service = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.pause_service = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.unpause_service = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) # agent if self.debug: # connect to agent server from agents.agent_conn import AgentClient self.agent = AgentClient(server_address='127.0.0.1', port=5010, num_actions=len(ACTION_LIST), batch_size=BATCH_SIZE, capacity=MEM_CAPACITY, gamma=GAMMA, prioritized=PRIOTIZED, lr=LR) else: # create agent in this process from agents.agent import Agent self.agent = Agent(num_actions=len(ACTION_LIST), batch_size=BATCH_SIZE, capacity=MEM_CAPACITY, gamma=GAMMA, prioritized=PRIOTIZED, lr=LR) # load model if load_path is not None: self.agent.load_model(load_path) # load pickle file if exists if self.pkl_path is not None and os.path.exists(self.pkl_path): self.agent.load_memory(self.pkl_path) # mode self.punish_if_facing_wall = not manual_avoid self.punish_far_from_center = True def callback_lidar(self, data): """ callback function of lidar subscription Args: data (LaserScan): distance data of lidar """ raw_lidar = np.array(data.ranges) raw_lidar = lidar_transform(raw_lidar, self.debug) self.lidar_ranges = torch.FloatTensor(raw_lidar).view(1, 1, -1) # (1, 1, 360) def callback_image(self, data): """ callback function of image subscription Args: data (Image): image from from camera mounted on the robot """ try: img = self.bridge.imgmsg_to_cv2(data, "bgr8") # 640x480[px] # preprocess image img = img[100:, :] # 640x380[px] img = cv2.resize(img, (160, 95)) deriv_x = cv2.Sobel(img, cv2.CV_32F, 1, 0, ksize=5) deriv_y = cv2.Sobel(img, cv2.CV_32F, 0, 1, ksize=5) grad = np.sqrt(deriv_x ** 2 + deriv_x ** 2) ''' # visualize preprocessed image for debug def min_max(x, axis=None): min = x.min(axis=axis, keepdims=True) max = x.max(axis=axis, keepdims=True) result = (x-min)/(max-min) return result cv2.imshow('grad', min_max(grad)) cv2.waitKey(1) ''' img = torchvision.transforms.ToTensor()(grad) self.image = img.unsqueeze(0) # (1, 3, 95, 160) except CvBridgeError as e: rospy.logerr(e) def callback_odom(self, data): """ callback function of tracker subscription Args: data (Odometry): robot pose """ x = data.pose.pose.position.x y = data.pose.pose.position.y self.my_pose = torch.FloatTensor([x, y]).view(1, 2) def callback_amcl(self, data): """ callback function of amcl subscription Args: data (PoseWithCovarianceStamped): robot pose """ x = data.pose.pose.position.x y = data.pose.pose.position.y self.my_pose = torch.FloatTensor([-y, x]).view(1, 2) def callback_warstate(self, event): """ callback function of warstate subscription Notes: https://github.com/p-robotics-hub/burger_war_kit/blob/main/judge/README.md """ # get the game state from judge server by HTTP request resp = requests.get(JUDGE_URL + "/warState") json_dict = json.loads(resp.text) self.game_state = json_dict['state'] if self.game_state == "running": for tg in json_dict["targets"]: if tg["player"] == self.robot: self.score[tg["name"]] = int(tg["point"]) elif tg["player"] == self.enemy: self.score[tg["name"]] = -int(tg["point"]) msk = [] for k in self.marker_list: if self.score[k] > 0: msk.append(0) elif self.score[k] == 0: msk.append(1) else: msk.append(2) self.mask = torch.FloatTensor(msk).view(1, 18) def get_reward(self, past, current): """ reward function. Args: past (dict): score dictionary at previous step current (dict): score dictionary at current step Return: reward (int) """ diff_my_score = {k: current[k] - past[k] for k in self.score.keys() if k not in self.my_markers} diff_op_score = {k: current[k] - past[k] for k in self.my_markers} # Check LiDAR data to punish for AMCL failure if self.punish_if_facing_wall: bad_position = punish_by_min_dist(self.lidar_ranges, dist_th=0.15) else: if self.punish_far_from_center: pose = self.my_pose.squeeze() bad_position = punish_by_count(self.lidar_ranges, dist_th=0.2, count_th=90) if abs(pose[0].item()) > 1: bad_position -= 0.1 if abs(pose[1].item()) > 1: bad_position -= 0.1 else: bad_position = 0 plus_diff = sum([v for v in diff_my_score.values() if v > 0]) minus_diff = sum([v for v in diff_op_score.values() if v < 0]) return plus_diff + minus_diff + bad_position def strategy(self): # past state self.past_state = self.state # current state self.state = State( self.my_pose, # (1, 2) self.lidar_ranges, # (1, 1, 360) self.image, # (1, 3, 480, 640) self.mask, # (1, 18) ) if self.action is not None: current_score = copy.deepcopy(self.score) reward = self.get_reward(self.past_score, current_score) print("reward: {}".format(reward)) self.past_score = current_score reward = torch.LongTensor([reward]) self.agent.memorize(self.past_state, self.action, self.state, reward) # manual wall avoidance if not self.punish_if_facing_wall: avoid, linear_x, angular_z = manual_avoid_wall_2(self.lidar_ranges, dist_th=0.13, back_vel=0.2) else: avoid = False if avoid: self.action = None else: # get action from agent if self.step % 3 == 0: policy = "boltzmann" else: policy = "epsilon" self.action = self.agent.get_action(self.state, self.episode, policy, self.debug) choice = int(self.action.item()) linear_x = ACTION_LIST[choice][0] angular_z = ACTION_LIST[choice][1] print("step: {}, vel:{}, omega:{}".format(self.step, linear_x, angular_z)) # update twist twist = Twist() twist.linear.x = linear_x twist.linear.y = 0.0 twist.linear.z = 0.0 twist.angular.x = 0.0 twist.angular.y = 0.0 twist.angular.z = angular_z self.twist_pub.publish(twist) self.step += 1 def move_robot(self, model_name, position=None, orientation=None, linear=None, angular=None): state = ModelState() state.model_name = model_name # set pose pose = Pose() if position is not None: pose.position = Point(*position) if orientation is not None: tmpq = tft.quaternion_from_euler(*orientation) pose.orientation = Quaternion(tmpq[0], tmpq[1], tmpq[2], tmpq[3]) state.pose = pose # set twist twist = Twist() if linear is not None: twist.linear = Vector3(*linear) if angular is not None: twist.angular = Vector3(*angular) state.twist = twist try: self.state_service(state) except rospy.ServiceException, e: print("Service call failed: %s".format(e))