def main(args): """ Train and save the SAC model, for the halfcheetah problem :param args: (ArgumentParser) the input arguments """ env = gym.make(args.env) test_env = gym.make(args.env) if args.ent_coef is None: args.ent_coef = 'auto' model = SAC(env=env, test_env=test_env, seed=int(args.seed), ent_coef=args.ent_coef, reward_scale=5.) ep_rewards = model.learn(total_timesteps=int(args.max_timesteps), save_path=args.save_path) model.save(args.save_path + "/%s_model_seed%d_fin_auto.zip" % (args.env, int(args.seed))) np.save( args.save_path + "/%s_rews_seed%d_fin_auto.npy" % (args.env, int(args.seed)), np.array(ep_rewards)) # print("Saving model to halfcheetah_model.zip") # model.learn(total_timesteps=100) # model.load("halfcheetah_model.zip") model.evaluate(10)
env.observation_space = convert_to_gym(env.observation_space) # Create log dir for callback model saving os.makedirs("./temp_models/", exist_ok=True) env = Monitor(env, "./temp_models/", allow_early_resets=True) ##### TRAIN ##### if args.train: check_overwrite(args.model) model = SAC(MlpPolicy, env, verbose=1, tensorboard_log="./tensorboard_log/") model.learn(total_timesteps=int(args.step), log_interval=10, tb_log_name="log", callback=callback.callback) model.save(MODELS_FOLDER_PATH) #### TEST ##### if not args.train: model = SAC.load(MODELS_FOLDER_PATH) obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, done, info = env.step(scale_range(action, -1, 1, 0, 1)) env.render() if done: obs = env.reset()
class Strategy(object): def __init__(self, team): self.sac_cal = sac_calculate() self.a = [] self.s = [] self.r = 0.0 self.done = False self.avg_arr = np.zeros(64) self.team = team self.RadHead2Ball = 0.0 self.RadHead = 0.0 self.NunbotAPosX = 0.0 self.NunbotAPosY = 0.0 self.NunbotBPosX = 0.0 self.BallPosX = 0.0 self.BallPosY = 0.0 self.GoalX = 900.0 self.GoalY = 0.0 self.StartX = -900.0 self.StartY = 0.0 self.kick_count = 0 self.kick_num = 0 self.score = 0 self.RadTurn = 0.0 self.Steal = False self.dis2start = 0.0 self.dis2goal = 0.0 self.vec = VelCmd() self.A_info = np.array([1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0]) self.game_count = 2 self.A_z = 0.0 self.B_z = 0.0 self.HowEnd = 0 self.B_dis = 0.0 self.ep_rwd = 0 self.is_kick = False self.ready2restart = True self.list_rate = list(np.zeros(128)) self.milestone = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] self.step_milestone = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] self.milestone_idx = 0 self.is_in = False self.is_out = False self.is_steal = False self.is_fly = False self.is_stealorfly = False self.real_resart = True self.step_count = 0 self.LeftAng = 0 self.Dis2Ball = 0 def callback(self, data): # Rostopic 之從外部得到的值 self.RadHead2Ball = data.ballinfo.real_pos.angle self.Dis2Ball = data.ballinfo.real_pos.radius self.RadHead = data.robotinfo[0].heading.theta self.BallPosX = data.ballinfo.pos.x self.BallPosY = data.ballinfo.pos.y self.NunbotAPosX = data.robotinfo[0].pos.x self.NunbotAPosY = data.robotinfo[0].pos.y self.NunbotBPosX = data.obstacleinfo.pos[0].x self.B_dis = data.obstacleinfo.polar_pos[0].radius def steal_callback(self, data): self.Steal = data.data def A_info_callback(self, data): self.A_info = np.array(data.data) self.is_kick = True def GoalInfo(self, data): self.LeftAng = data.left_angle def state_callback(self, data): self.kick_count = 0 def reward_callback(self, data): pass # self.r = data.data def done_callback(self, data): self.done = data.data def fly_callback(self, data): self.A_z = data.pose[5].position.z self.B_z = data.pose[6].position.z def HowEnd_callback(self, data): self.HowEnd = data.data def ready2restart_callback(self, data): self.restart() self.ready2restart = False def ros_init(self): if self.team == 'A': self.agent = SAC(act_dim=2, obs_dim=7, lr_actor=l_rate * (1e-3), lr_value=l_rate * (1e-3), gamma=0.99, tau=0.995) rospy.init_node('strategy_node_A', anonymous=True) # self.A_info_pub = rospy.Publisher('/nubot1/A_info', Float32MultiArray, queue_size=1) # 3in1 self.vel_pub = rospy.Publisher('/nubot1/nubotcontrol/velcmd', VelCmd, queue_size=1) self.reset_pub = rospy.Publisher('/gazebo/set_model_state', ModelState, queue_size=10) # self.ready2restart_pub = rospy.Publisher('nubot1/ready2restart',Bool, queue_size=1) rospy.Subscriber("/nubot1/omnivision/OmniVisionInfo", OminiVisionInfo, self.callback) rospy.Subscriber('gazebo/model_states', ModelStates, self.fly_callback) # rospy.Subscriber('/coach/state', String, self.state_callback) # rospy.Subscriber('/coach/reward', Float32, self.reward_callback) # rospy.Subscriber('/coach/done', Bool, self.done_callback) # rospy.Subscriber('coach/HowEnd', Int16, self.HowEnd_callback) # rospy.Subscriber("/rival1/steal", Bool, self.steal_callback) rospy.wait_for_service('/nubot1/Shoot') self.call_Shoot = rospy.ServiceProxy('/nubot1/Shoot', Shoot) # rospy.wait_for_service('/gazebo/reset_simulation') # self.call_restart = rospy.ServiceProxy('/gazebo/reset_simulation', Empty, persistent=True) # rospy.wait_for_service('/gazebo/set_model_state') # self.call_set_modol = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) rospy.wait_for_service('/nubot1/BallHandle') self.call_Handle = rospy.ServiceProxy('/nubot1/BallHandle', BallHandle) rospy.wait_for_service('/rival1/BallHandle') self.call_B_Handle = rospy.ServiceProxy('/rival1/BallHandle', BallHandle) elif self.team == 'B': rospy.init_node('strategy_node_B', anonymous=True) self.vel_pub = rospy.Publisher('/rival1/nubotcontrol/velcmd', VelCmd, queue_size=1) self.steal_pub = rospy.Publisher('/rival1/steal', Bool, queue_size=1) # steal rospy.Subscriber("/rival1/omnivision/OmniVisionInfo", OminiVisionInfo, self.callback) rospy.Subscriber("/rival1/omnivision/OmniVisionInfo/GoalInfo", PPoint, self.GoalInfo) rospy.wait_for_service('/rival1/BallHandle') self.call_Handle = rospy.ServiceProxy('/rival1/BallHandle', BallHandle) else: rospy.init_node('coach', anonymous=True) self.state_pub = rospy.Publisher('/coach/state', String, queue_size=1) self.reward_pub = rospy.Publisher('/coach/reward', Float32, queue_size=1) self.done_pub = rospy.Publisher('coach/done', Bool, queue_size=1) self.HowEnd_pub = rospy.Publisher('coach/HowEnd', Int16, queue_size=1) rospy.Subscriber("/nubot1/omnivision/OmniVisionInfo", OminiVisionInfo, self.callback) rospy.Subscriber("/rival1/steal", Bool, self.steal_callback) # steal rospy.Subscriber("/nubot1/A_info", Float32MultiArray, self.A_info_callback) # rospy.Subscriber('gazebo/model_states', ModelStates, self.fly_callback) rospy.Subscriber('nubot1/ready2restart', Bool, self.ready2restart_callback) rospy.wait_for_service('/gazebo/reset_simulation') self.call_restart = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) def ball_out(self): if self.BallPosX >= 875 or self.BallPosX <= -875 or self.BallPosY >= 590 or self.BallPosY <= -590: self.show('Out') self.is_out = True def ball_in(self): if self.BallPosX >= 870 and self.BallPosX <= 900 and self.BallPosY >= -100 and self.BallPosY <= 100: self.show('in') self.is_in = True def fly(self): if self.A_z > 0.30 or self.B_z > 0.30: self.is_fly = True def steal(self): rospy.wait_for_service('/nubot1/BallHandle') rospy.wait_for_service('/rival1/BallHandle') if self.call_B_Handle( 1).BallIsHolding and not self.call_Handle(1).BallIsHolding: self.is_steal = True def stealorfly(self): if self.is_fly or self.is_steal: self.is_stealorfly = True def show(self, state): global _state if state != _state: print(state) _state = state def cnt_rwd(self): data = Float32MultiArray() data.data = [ self.kick_count, self.cal_dis2start(), self.cal_dis2goal(), self.B_dis, 0, 0, 0, 0 ] if self.game_is_done(): if self.HowEnd == 1: data.data[4] = 1 data.data[5] = 0 data.data[6] = 0 elif self.HowEnd == -1: data.data[4] = 0 data.data[5] = 0 data.data[6] = 1 elif self.HowEnd == -2: data.data[4] = 0 data.data[5] = 1 data.data[6] = 0 # self.A_info_pub.publish(data) # 2C self.sac_cal = sac_calculate() reward = self.sac_cal.reward(data.data) data.data[7] = reward print('rwd init', ['kck_n g_dis st_dis opp_dis in out steal ttl']) print('rwd unit', np.around((data.data), decimals=1)) print('rwd :', reward) return (reward) def kick(self): global MaxSpd_A self.vec.Vx = MaxSpd_A * math.cos(self.RadHead2Ball) self.vec.Vy = MaxSpd_A * math.sin(self.RadHead2Ball) # self.vec.w = self.RadHead2Ball * RotConst self.vec.w = 0 self.vel_pub.publish(self.vec) global pwr # rospy.wait_for_service('/nubot1/Shoot') self.call_Shoot(pwr, 1) # power from GAFuzzy while 1: self.chase(MaxSpd_A) if self.game_is_done() and self.real_resart: break if not self.call_Handle(1).BallIsHolding: self.kick_count = self.kick_count + 1 # time.sleep(0.2) print("Kick: %d" % self.kick_count) print('-------') break print('in') def chase(self, MaxSpd): self.vec.Vx = MaxSpd * math.cos(self.RadHead2Ball) self.vec.Vy = MaxSpd * math.sin(self.RadHead2Ball) self.vec.w = self.RadHead2Ball * RotConst self.vel_pub.publish(self.vec) def chase_B(self, MaxSpd): self.vec.Vx = MaxSpd * math.cos(self.RadHead2Ball) self.vec.Vy = MaxSpd * math.sin(self.RadHead2Ball) self.vec.w = self.RadHead2Ball * RotConst / 4 ####### self.vel_pub.publish(self.vec) def turn(self, angle): global MaxSpd_A self.vec.Vx = MaxSpd_A * math.cos(self.RadHead2Ball) self.vec.Vy = MaxSpd_A * math.sin(self.RadHead2Ball) # self.vec.Vx = 0 # self.vec.Vy = 0 self.vec.w = turnHead2Kick(self.RadHead, angle) * RotConst self.vel_pub.publish(self.vec) self.show("Turning") def cal_dis2start(self): # last kick dis2start_x = self.NunbotAPosX - self.StartX dis2start_y = self.NunbotAPosY - self.StartY dis2start = math.hypot(dis2start_x, dis2start_y) return dis2start # self.dis2start_pub.publish(dis2start) def cal_dis2goal(self): # last kick dis2goal_x = self.NunbotAPosX - self.GoalX dis2goal_y = self.NunbotAPosY - self.GoalY dis2goal = math.hypot(dis2goal_x, dis2goal_y) return dis2goal # self.dis2goal_pub.publish(dis2goal) def avg(self, n, l): l = np.delete(l, 0) l = np.append(l, n) self.avg_arr = l print(self.avg_arr) print(sum(l) / 64) def reset_ball(self): while self.BallPosX != -680: Ball_msg = ModelState() Ball_msg.model_name = 'football' Ball_msg.pose.position.x = -6.8 #-6 #-6.8 Ball_msg.pose.position.y = random.uniform(-3.3, 3.3) # Ball_msg.pose.position.y = 0 Ball_msg.pose.position.z = 0.12 Ball_msg.pose.orientation.x = 0 Ball_msg.pose.orientation.y = 0 Ball_msg.pose.orientation.z = 0 Ball_msg.pose.orientation.w = 1 self.reset_pub.publish(Ball_msg) def reset_A(self): while self.NunbotAPosX != -830: A_msg = ModelState() A_msg.model_name = 'nubot1' A_msg.pose.position.x = -8.3 #-8 #-8.5 A_msg.pose.position.y = random.uniform(-1.7, 1.7) #0 # A_msg.pose.position.y = 0 A_msg.pose.position.z = 0 A_msg.pose.orientation.x = 0 A_msg.pose.orientation.y = 0 A_msg.pose.orientation.z = 0 A_msg.pose.orientation.w = 1 self.reset_pub.publish(A_msg) def reset_B(self): while self.NunbotBPosX != 0: B_msg = ModelState() B_msg.model_name = 'rival1' # a = [[2,0],[1, -2],[1, 2],[0,4],[0,-4]] # b = a[random.randint(0, 4)] c = random.uniform(-5, 5) B_msg.pose.position.x = 0 # B_msg.pose.position.y = 0 B_msg.pose.position.y = c B_msg.pose.position.z = 0 B_msg.pose.orientation.x = 0 B_msg.pose.orientation.y = 0 B_msg.pose.orientation.z = 0 B_msg.pose.orientation.w = 1 self.reset_pub.publish(B_msg) def restart(self): # game_state_word = "game is over" # self.state_pub.publish(game_state_word) # 2A # self.Steal = False self.reset_ball() print('Game %d over' % (self.game_count - 1)) print('-----------Restart-----------') print('Game %d start' % self.game_count) self.reset_A() self.game_count += 1 self.kick_count = 0 self.reset_B() # self.call_set_modol(SetModelState) # print('after call_restart') self.ready2restart = False self.is_fly = False self.is_steal = False self.is_stealorfly = False self.is_in = False self.is_out = False # def save(self, agent, in_rate): # print('save file') # if os.path.isdir(agent.path+str(in_rate)): # shutil.rmtree(agent.path+str(in_rate)) # os.mkdir(agent.path+str(in_rate)) # ckpt_path = os.path.join(agent.path+str(in_rate), 'SAC.ckpt') # save_path = agent.saver.save(agent.sess, ckpt_path, write_meta_graph=False) # print("\nSave Model %s\n" % save_path) def save(self, agent, in_rate): print('save file') if os.path.isdir(agent.path): shutil.rmtree(agent.path) os.mkdir(agent.path) ckpt_path = os.path.join(agent.path, 'SAC.ckpt') save_path = agent.saver.save(agent.sess, ckpt_path, write_meta_graph=False) print("\nSave Model %s\n" % save_path) def end_rate(self, end): self.list_rate[self.game_count % 128] = end out_count = self.list_rate.count(-2) in_count = self.list_rate.count(1) steal_count = self.list_rate.count(-1) in_rate = in_count / 128 print('in_rate', in_count / 128, 'out_rate', out_count / 128, 'steal_rate', steal_count / 128) if in_count / 128 != 0 and self.milestone_idx == 0: self.milestone[0] = self.game_count self.step_milestone[0] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.1 and self.milestone_idx == 1: self.milestone[1] = self.game_count self.step_milestone[1] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.2 and self.milestone_idx == 2: self.milestone[2] = self.game_count self.step_milestone[2] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.3 and self.milestone_idx == 3: self.milestone[3] = self.game_count self.step_milestone[3] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.4 and self.milestone_idx == 4: self.milestone[4] = self.game_count self.step_milestone[4] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.5 and self.milestone_idx == 5: self.milestone[5] = self.game_count self.step_milestone[5] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.6 and self.milestone_idx == 6: self.milestone[6] = self.game_count self.step_milestone[6] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.7 and self.milestone_idx == 7: self.milestone[7] = self.game_count self.step_milestone[7] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.8 and self.milestone_idx == 8: self.milestone[8] = self.game_count self.step_milestone[8] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 0.9 and self.milestone_idx == 9: self.milestone[9] = self.game_count self.step_milestone[9] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) if in_count / 128 >= 1.0 and self.milestone_idx == 10: self.milestone[10] = self.game_count self.step_milestone[10] = self.step_count self.milestone_idx = self.milestone_idx + 1 self.save(self.agent, in_rate) print('milestone', self.milestone) print('milestone', self.step_milestone) def game_is_done(self): self.ball_in() self.ball_out() self.steal() self.fly() self.stealorfly() if self.is_in or self.is_out or self.is_stealorfly: if self.is_in: self.HowEnd = 1 elif self.is_out: self.HowEnd = -2 elif self.is_stealorfly: self.HowEnd = -1 else: print('err') return True else: return False def workA(self): np.set_printoptions(suppress=True) i = 0 fisrt_time_hold = False while not rospy.is_shutdown(): # while self.step_count < 30000: while 1: # print(self.ball_in(), self.ball_out(), self.stealorfly()) rospy.wait_for_service('/nubot1/BallHandle') self.call_Handle(1) # open holding device if self.game_is_done() and self.real_resart: # print('self.game_is_done()',self.game_is_done()) self.r = self.cnt_rwd() # print('h',self.HowEnd) s_ = self.sac_cal.input(self.HowEnd) #out state if i > 1: if len(self.s) == 7 and len(s_) == 7: # print('000000000000000000',np.shape(self.s), np.shape(self.a)) self.agent.replay_buffer.store_transition( self.s, self.a, self.r, s_, self.done) # print('d',self.done) self.ep_rwd = self.r print('ep rwd value=', self.r) self.end_rate(self.HowEnd) self.step_count = self.step_count + 1 i += 1 self.s = s_ self.done = False if i > 512: # 64 self.agent.learn(i, self.r, self.ep_rwd) # self.ready2restart_pub.publish(True) # self.ready2restart_pub.publish(False) self.real_resart = False self.HowEnd = 0 # print('i want to go in self.restart()') self.restart() # self.end_rate(self.HowEnd) # print('---') # elif not self.game_is_done(): else: # print('self.game_is_done()',self.game_is_done()) rospy.wait_for_service('/nubot1/BallHandle') if not self.call_Handle( 1).BallIsHolding: # BallIsHolding = 0 self.chase(MaxSpd_A) fisrt_time_hold = True rospy.wait_for_service('/nubot1/BallHandle') # do real reset before holding self.ready2restart = False self.is_fly = False self.is_steal = False self.is_stealorfly = False self.is_in = False self.is_out = False self.real_resart = True # elif self.call_Handle( 1).BallIsHolding: # BallIsHolding = 1 global RadHead self.chase(MaxSpd_A) if fisrt_time_hold == True: self.real_resart = True # self.chase(MaxSpd_A) self.show('Touch') self.r = self.cnt_rwd() s_ = self.sac_cal.input(0) #state_for_sac if i >= 1: if len(self.s) == 7 and len(s_) == 7: self.agent.replay_buffer.store_transition( self.s, self.a, self.r, s_, self.done) print('step rwd value= ', self.r) self.step_count = self.step_count + 1 self.done = False i += 1 self.s = s_ self.a = self.agent.choose_action( self.s) #action_from_sac rel_turn_ang = self.sac_cal.output( self.a) #action_from_sac global pwr, MAX_PWR pwr = (self.a[1] + 1) * MAX_PWR / 2 + 1.4 #normalize # sac] rel_turn_rad = math.radians(rel_turn_ang) self.RadTurn = rel_turn_rad + self.RadHead fisrt_time_hold = False if i > 512: self.agent.learn(i, self.r, self.ep_rwd) elif fisrt_time_hold == False: self.chase(MaxSpd_A) error = math.fabs( turnHead2Kick(self.RadHead, self.RadTurn)) if error > angle_thres: # 還沒轉到 self.turn(self.RadTurn) else: # 轉到 self.kick() def ClassicRounding(self, goal_ang, ball_dis, ball_rad, MaxSpd): # alpha = math.radians(ball_ang - goal_ang) ball_ang = math.degrees(ball_rad) alpha = math.radians(ball_ang - goal_ang) if ball_dis > 120 * 3: beta = 0.7 else: beta = 1 if abs(alpha) > beta: alpha = beta * np.sign(alpha) else: pass v_yaw = goal_ang br_x = MaxSpd * math.cos(math.radians(ball_ang)) br_y = MaxSpd * math.sin(math.radians(ball_ang)) v_x = br_x * math.cos(alpha) - br_y * math.sin(alpha) v_y = br_x * math.sin(alpha) + br_y * math.cos(alpha) self.vec.Vx = v_x self.vec.Vy = v_y # print(goal_ang) self.vec.w = math.radians(v_yaw) * 0.25 # self.vec.w = self.RadHead2Ball * RotConst/4####### self.vel_pub.publish(self.vec) return v_x, v_y, v_yaw def workB(self): # catch = False while not rospy.is_shutdown(): rospy.wait_for_service('/rival1/BallHandle') # self.call_Handle(1) #start holding device if not self.call_Handle(1).BallIsHolding: # BallIsHolding = 0 self.steal_pub.publish(False) self.chase_B(MaxSpd_B) # chase ball strategy # self.ClassicRounding(self.LeftAng, self.Dis2Ball, self.RadHead2Ball,MaxSpd_B) # catch = False else: # BallIsHolding = 1 # self.chase(MaxSpd_B/4) # if not catch: # catch = True # ticks = time.time() # ticks = ticks + 1 # sec # steal time # if time.time() > ticks: self.steal_pub.publish(True) # 2C # self.show('steal') # ticks += 5 def workC(self): print('Game 1 start') # rate = rospy.Rate(10) np.set_printoptions(suppress=True) while not rospy.is_shutdown(): is_stealorfly = self.stealorfly() is_out = self.ball_out() is_in = self.ball_in() # # [] send rwd 2 A # self.sac_cal = sac_calculate() # # self.A_info = list(self.A_info) # # if self.is_kick: # self.A_info[4] = 0 # self.A_info[5] = 0 # self.A_info[6] = 0 # reward = self.sac_cal.reward(self.A_info) # rwd 2 A # self.reward_pub.publish(reward) # print('step rwd unit = ', np.around((self.A_info), decimals=1 )) # 7in1 # 7 rwd unit # print('step rwd value =',reward) # # self.is_kick = False if is_in or is_out or is_stealorfly: # done 2 A # self.ready2restart = False if is_in: HowEnd = 1 # self.A_info[4] = 1 # self.A_info[5] = 0 # self.A_info[6] = 0 if is_stealorfly: HowEnd = -1 # self.A_info[4] = 0 # self.A_info[5] = 0 # self.A_info[6] = 1 if is_out: HowEnd = -2 # self.A_info[4] = 0 # self.A_info[5] = 1 # self.A_info[6] = 0 self.HowEnd_pub.publish(HowEnd) self.done_pub.publish(True) if self.ready2restart: self.restart()
class Strategy(object): def __init__(self, team): self.sac_cal = sac_calculate() self.a = [] self.s = [] self.r = 0.0 self.done = False self.avg_arr = np.zeros(64) self.team = team self.RadHead2Ball = 0.0 self.RadHead = 0.0 self.NunbotAPosX = 0.0 self.NunbotAPosY = 0.0 self.BallPosX = 0.0 self.BallPosY = 0.0 self.GoalX = 900.0 self.GoalY = 0.0 self.StartX = -900.0 self.StartY = 0.0 self.kick_count = 0 self.kick_num = 0 self.score = 0 self.RadTurn = 0.0 self.Steal = False self.dis2start = 0.0 self.dis2goal = 0.0 self.vec = VelCmd() self.A_info = np.array([1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0]) self.game_count = 2 self.A_z = 0.0 self.B_z = 0.0 self.HowEnd = 0 self.B_dis = 0.0 self.ep_rwd = 0 self.is_kick = False self.ready2restart = True self.list_rate = list(np.zeros(128)) self.milestone=[0, 0, 0, 0, 0, 0, 0] self.step_milestone=[0, 0, 0, 0, 0, 0, 0] self.milestone_idx =0 self.is_in = False self.is_out = False self.is_steal = False self.is_fly = False self.is_be_in = False self.is_stealorfly = False self.real_resart = True self.step_count = 0 def callback(self, data): # Rostopic 之從外部得到的值 self.RadHead2Ball = data.ballinfo.real_pos.angle self.RadHead = data.robotinfo[0].heading.theta self.BallPosX = data.ballinfo.pos.x self.BallPosY = data.ballinfo.pos.y self.NunbotAPosX = data.robotinfo[0].pos.x self.NunbotAPosY = data.robotinfo[0].pos.y self.B_dis = data.obstacleinfo.polar_pos[0].radius def steal_callback(self, data): self.Steal = data.data def A_info_callback(self, data): self.A_info = np.array(data.data) self.is_kick = True def state_callback(self,data): self.kick_count = 0 def reward_callback(self, data): pass # self.r = data.data def done_callback(self, data): self.done = data.data def fly_callback(self, data): self.A_z = data.pos[5].position.z self.B_z = data.pos[6].position.z def HowEnd_callback(self,data): self.HowEnd = data.data def ready2restart_callback(self, data): self.restart() self.ready2restart = False def ros_init(self): if self.team == 'AB': self.agent = SAC(act_dim=8, obs_dim=10, lr_actor=l_rate*(1e-3), lr_value=l_rate*(1e-3), gamma=0.99, tau=0.995) rospy.init_node('strategy_node_AB', anonymous=True) # self.A_info_pub = rospy.Publisher('/nubot1/A_info', Float32MultiArray, queue_size=1) # 3in1 self.Avel_pub = rospy.Publisher('/nubot1/nubotcontrol/velcmd', VelCmd, queue_size=10) self.Bvel_pub = rospy.Publisher('/nubot2/nubotcontrol/velcmd', VelCmd, queue_size=10) self.reset_pub = rospy.Publisher('/gazebo/set_model_state', ModelState, queue_size=10) rospy.Subscriber("/nubot1/omnivision/OmniVisionInfo", OminiVisionInfo, self.Acallback) rospy.Subscriber("/nubot2/omnivision/OmniVisionInfo", OminiVisionInfo, self.Bcallback) # rospy.Subscriber('gazebo/model_states', ModelStates, self.fly_callback)33333333333333333333 # rospy.Subscriber('/coach/state', String, self.state_callback) # rospy.Subscriber('/coach/reward', Float32, self.reward_callback) # rospy.Subscriber('/coach/done', Bool, self.done_callback) # rospy.Subscriber('coach/HowEnd', Int16, self.HowEnd_callback) # rospy.Subscriber("/rival1/steal", Bool, self.steal_callback) rospy.wait_for_service('/nubot1/Shoot') rospy.wait_for_service('/nubot2/Shoot') self.Acall_Shoot = rospy.ServiceProxy('/nubot1/Shoot', Shoot) self.Bcall_Shoot = rospy.ServiceProxy('/nubot2/Shoot', Shoot) # rospy.wait_for_service('/gazebo/reset_simulation') # self.call_restart = rospy.ServiceProxy('/gazebo/reset_simulation', Empty, persistent=True) # rospy.wait_for_service('/gazebo/set_model_state') # self.call_set_modol = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) rospy.wait_for_service('/nubot1/BallHandle') rospy.wait_for_service('/nubot2/BallHandle') self.Acall_Handle = rospy.ServiceProxy('/nubot1/BallHandle', BallHandle) self.Bcall_Handle = rospy.ServiceProxy('/nubot2/BallHandle', BallHandle) rospy.wait_for_service('/rival1/BallHandle') rospy.wait_for_service('/rival2/BallHandle') self.Ccall_Handle = rospy.ServiceProxy('/rival1/BallHandle', BallHandle) self.Dcall_Handle = rospy.ServiceProxy('/rival2/BallHandle', BallHandle) elif self.team == 'C': rospy.init_node('strategy_node_C', anonymous=True) self.vel_pub = rospy.Publisher('/rival1/nubotcontrol/velcmd', VelCmd, queue_size=1) self.steal_pub = rospy.Publisher('/rival1/steal', Bool, queue_size=1) # steal rospy.Subscriber("/rival1/omnivision/OmniVisionInfo", OminiVisionInfo, self.callback) rospy.wait_for_service('/rival1/BallHandle') self.call_Handle = rospy.ServiceProxy('/rival1/BallHandle', BallHandle) elif self.team == 'D': rospy.init_node('strategy_node_D', anonymous=True) self.vel_pub = rospy.Publisher('/rival2/nubotcontrol/velcmd', VelCmd, queue_size=1) self.steal_pub = rospy.Publisher('/rival1/steal', Bool, queue_size=1) # steal rospy.Subscriber("/rival1/omnivision/OmniVisionInfo", OminiVisionInfo, self.callback) rospy.wait_for_service('/rival1/BallHandle') self.call_Handle = rospy.ServiceProxy('/rival1/BallHandle', BallHandle) else : rospy.init_node('coach', anonymous=True) self.state_pub = rospy.Publisher('/coach/state', String, queue_size=1) self.reward_pub = rospy.Publisher('/coach/reward', Float32, queue_size=1) self.done_pub = rospy.Publisher('coach/done', Bool, queue_size=1) self.HowEnd_pub = rospy.Publisher('coach/HowEnd', Int16, queue_size=1) rospy.Subscriber("/nubot1/omnivision/OmniVisionInfo", OminiVisionInfo, self.callback) rospy.Subscriber("/rival1/steal", Bool, self.steal_callback) # steal rospy.Subscriber("/nubot1/A_info", Float32MultiArray, self.A_info_callback) # rospy.Subscriber('gazebo/model_states', ModelStates, self.fly_callback) rospy.Subscriber('nubot1/ready2restart',Bool , self.ready2restart_callback) rospy.wait_for_service('/gazebo/reset_simulation') self.call_restart = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) def ball_out(self): if self.BallPosX >= 875 or self.BallPosX <= -875 or self.BallPosY >= 590 or self.BallPosY <= -590 : self.show('Out') self.is_out = True def ball_in(self): if self.BallPosX >= 870 and self.BallPosX <= 900 and self.BallPosY >= -100 and self.BallPosY <= 100 : self.show('in') self.is_in = True def ball_be_in(self): if self.BallPosX <= -870 and self.BallPosX >= -900 and self.BallPosY >= -100 and self.BallPosY <= 100 : self.show('in') self.is_in = True def fly(self): if self.A_z > 0.2 or self.B_z > 0.2 : self.is_fly = True def steal(self): rospy.wait_for_service('/nubot1/BallHandle') rospy.wait_for_service('/rival1/BallHandle') if self.call_B_Handle(1).BallIsHolding and not self.call_Handle(1).BallIsHolding: self.is_steal = True def stealorfly(self): if self.is_fly or self.is_steal: self.is_stealorfly = True def show(self, state): global _state if state != _state : print(state) _state = state def cnt_rwd(self): data = Float32MultiArray() data.data = [self.kick_count, self.cal_dis2start(), self.cal_dis2goal(), self.B_dis, 0, 0, 0, 0] if self.game_is_done(): if self.HowEnd == 1: data.data[4] = 1 data.data[5] = 0 data.data[6] = 0 elif self.HowEnd == -1: data.data[4] = 0 data.data[5] = 0 data.data[6] = 1 elif self.HowEnd == -2: data.data[4] = 0 data.data[5] = 1 data.data[6] = 0 # self.A_info_pub.publish(data) # 2C self.sac_cal = sac_calculate() reward = self.sac_cal.reward(data.data) data.data[7] = reward print('rwd init',['kck_n g_dis st_dis opp_dis in out steal ttl']) print('rwd unit',np.around((data.data), decimals=1 )) print('rwd :',reward) return(reward) def kick(self): global MaxSpd_A self.vec.Vx = MaxSpd_A * math.cos(self.RadHead2Ball) self.vec.Vy = MaxSpd_A * math.sin(self.RadHead2Ball) # self.vec.w = self.RadHead2Ball * RotConst self.vec.w = 0 self.vel_pub.publish(self.vec) global pwr # rospy.wait_for_service('/nubot1/Shoot') self.call_Shoot(pwr, 1) # power from GAFuzzy while 1 : self.chase(MaxSpd_A) if self.game_is_done() and self.real_resart: break if not self.call_Handle(1).BallIsHolding: self.kick_count = self.kick_count + 1 # time.sleep(0.2) print ("Kick: %d" %self.kick_count) print('-------') break print('in') def chase(self, MaxSpd): self.vec.Vx = MaxSpd * math.cos(self.RadHead2Ball) self.vec.Vy = MaxSpd * math.sin(self.RadHead2Ball) self.vec.w = self.RadHead2Ball * RotConst self.vel_pub.publish(self.vec) def chase_B(self, MaxSpd): self.vec.Vx = MaxSpd * math.cos(self.RadHead2Ball) self.vec.Vy = MaxSpd * math.sin(self.RadHead2Ball) self.vec.w = self.RadHead2Ball * RotConst #/4 self.vel_pub.publish(self.vec) # self.show("Chasing") def turn(self, angle): global MaxSpd_A self.vec.Vx = MaxSpd_A * math.cos(self.RadHead2Ball) self.vec.Vy = MaxSpd_A * math.sin(self.RadHead2Ball) # self.vec.Vx = 0 # self.vec.Vy = 0 self.vec.w = turnHead2Kick(self.RadHead, angle) * RotConst self.vel_pub.publish(self.vec) self.show("Turning") def cal_dis2start(self): # last kick dis2start_x = self.NunbotAPosX - self.StartX dis2start_y = self.NunbotAPosY - self.StartY dis2start = math.hypot(dis2start_x, dis2start_y) return dis2start # self.dis2start_pub.publish(dis2start) def cal_dis2goal(self): # last kick dis2goal_x = self.NunbotAPosX - self.GoalX dis2goal_y = self.NunbotAPosY - self.GoalY dis2goal = math.hypot(dis2goal_x, dis2goal_y) return dis2goal # self.dis2goal_pub.publish(dis2goal) def avg(self, n, l): l = np.delete(l, 0) l = np.append(l, n) self.avg_arr = l print (self.avg_arr) print (sum(l)/64) def reset_ball(self): Ball_msg = ModelState() Ball_msg.model_name = 'football' Ball_msg.pose.position.x = -6.8 #-6 #-6.8 Ball_msg.pose.position.y = random.uniform(-3.3,3.3) Ball_msg.pose.position.z = 0.12 Ball_msg.pose.orientation.x = 0 Ball_msg.pose.orientation.y = 0 Ball_msg.pose.orientation.z = 0 Ball_msg.pose.orientation.w = 1 self.reset_pub.publish(Ball_msg) def reset_A(self): A_msg = ModelState() A_msg.model_name = 'nubot1' A_msg.pose.position.x = -8.3 #-8 #-8.5 A_msg.pose.position.y = random.uniform(-1.7,1.7) #0 A_msg.pose.position.z = 0 A_msg.pose.orientation.x = 0 A_msg.pose.orientation.y = 0 A_msg.pose.orientation.z = 0 A_msg.pose.orientation.w = 1 self.reset_pub.publish(A_msg) def reset_B(self): B_msg = ModelState() B_msg.model_name = 'rival1' # a = [[2,0],[1, -2],[1, 2],[0,4],[0,-4]] # b = a[random.randint(0, 4)] c = random.uniform(-5,5) B_msg.pose.position.x = 0 B_msg.pose.position.y = c B_msg.pose.position.z = 0 B_msg.pose.orientation.x = 0 B_msg.pose.orientation.y = 0 B_msg.pose.orientation.z = 0 B_msg.pose.orientation.w = 1 self.reset_pub.publish(B_msg) def restart(self): # game_state_word = "game is over" # self.state_pub.publish(game_state_word) # 2A # self.Steal = False self.reset_ball() self.reset_ball() print('Game %d over' %(self.game_count-1)) print('-----------Restart-----------') print('Game %d start' %self.game_count) self.reset_A() self.reset_A() self.game_count += 1 self.kick_count = 0 self.reset_B() self.reset_B() # self.call_set_modol(SetModelState) # print('after call_restart') self.ready2restart =False self.is_fly = False self.is_steal = False self.is_stealorfly = False self.is_in = False self.is_out = False # print('i finish def restart(self)') def end_rate(self, end): self.list_rate[self.game_count%128] = end out_count = self.list_rate.count(-2) in_count = self.list_rate.count(1) steal_count=self.list_rate.count(-1) print('in_rate',in_count/128,'out_rate',out_count/128,'steal_rate',steal_count/128) if in_count/128 != 0 and self.milestone_idx == 0: self.milestone[0]=self.game_count self.step_milestone[0]=self.step_count self.milestone_idx = self.milestone_idx +1 if in_count/128 >= 0.1 and self.milestone_idx ==1: self.milestone[1]=self.game_count self.step_milestone[1]=self.step_count self.milestone_idx = self.milestone_idx +1 if in_count/128 >= 0.2 and self.milestone_idx ==2: self.milestone[2]=self.game_count self.step_milestone[2]=self.step_count self.milestone_idx = self.milestone_idx +1 if in_count/128 >= 0.5 and self.milestone_idx ==3: self.milestone[3]=self.game_count self.step_milestone[3]=self.step_count self.milestone_idx = self.milestone_idx +1 if in_count/128 >= 0.8 and self.milestone_idx ==4: self.milestone[4]=self.game_count self.step_milestone[4]=self.step_count self.milestone_idx = self.milestone_idx +1 if in_count/128 >= 0.9 and self.milestone_idx ==5: self.milestone[5]=self.game_count self.step_milestone[5]=self.step_count self.milestone_idx = self.milestone_idx +1 if in_count/128 == 1 and self.milestone_idx ==6: self.milestone[6]=self.game_count self.step_milestone[6]=self.step_count self.milestone_idx = self.milestone_idx +1 print('milestone',self.milestone) print('milestone',self.step_milestone) def game_is_done(self): self.ball_in() self.ball_out() self.ball_be_in() self.fly() # self.stealorfly() if self.is_in or self.is_out or self.is_fly or self.is_be_in: if self.is_in: self.HowEnd = 1 elif self.is_out: self.HowEnd = -2 elif self.is_stealorfly: self.HowEnd = -1 else: print('err') return True else: return False def a2Action(self): global pwr self.vec.Vx = MaxSpd * self.a[0] self.vec.Vy = MaxSpd * self.a[1] self.vec.w = RotConst* self.a[2] self.Avel_pub.publish(self.vec) if self.a[3] > 0: pwr = self.a[3] * MAX_PWR else: pwr = 0 self.a[4] self.a[5] self.a[6] self.a[7] def workA(self): np.set_printoptions(suppress=True) i = 0 while not rospy.is_shutdown(): rospy.wait_for_service('/nubot1/BallHandle') self.Acall_Handle(1) # open holding device rospy.wait_for_service('/nubot2/BallHandle') self.Bcall_Handle(1) # open holding device if self.game_is_done() and self.real_resart: self.r = self.cnt_rwd() s_ = self.sac_cal.input(self.HowEnd) #out state if i > 1: if len(self.s) == 10 and len(s_) == 10: self.agent.replay_buffer.store_transition(self.s , self.a, self.r, s_, self.done) self.ep_rwd = self.r print('ep rwd value=',self.r) self.end_rate(self.HowEnd) self.step_count = self.step_count + 1 i += 1 self.s = s_ self.done = False if i > 512: # 64 self.agent.learn(i, self.r, self.ep_rwd) self.real_resart = False self.HowEnd = 0 self.restart() else: rospy.wait_for_service('/nubot1/BallHandle') rospy.wait_for_service('/nubot2/BallHandle') self.Acall_Handle(1) self.Bcall_Handle(1) self.real_resart = True self.r = self.cnt_rwd() s_ = self.sac_cal.input(0) #state_for_sac if i >= 1: if len(self.s) == 10 and len(s_) == 10: self.agent.replay_buffer.store_transition(self.s, self.a, self.r, s_, self.done) print('step rwd value= ',self.r) self.step_count = self.step_count + 1 self.done = False i += 1 self.s = s_ self.a = self.agent.choose_action(self.s) #action_from_sac self.a2Action global pwr, MAX_PWR pwr = (self.a[1]+1) * MAX_PWR/2 + 1.4 #normalize # sac] rel_turn_rad = math.radians(rel_turn_ang) self.RadTurn = rel_turn_rad + self.RadHead fisrt_time_hold = False if i > 512: self.agent.learn(i, self.r, self.ep_rwd) elif fisrt_time_hold == False: self.chase(MaxSpd_A) error = math.fabs (turnHead2Kick(self.RadHead, self.RadTurn)) if error > angle_thres : # 還沒轉到 self.turn(self.RadTurn) else : # 轉到 self.kick()
import gym from sac import SAC import sys if sys.argv[1] == "1": env = gym.make('BreakoutNoFrameskip-v4') model = SAC(env, discrete=True, feature_extraction="cnn") elif sys.argv[1] == "2": env = gym.make('CartPole-v0') model = SAC(env, discrete=True, feature_extraction="mlp", regularized=False) else: env = gym.make('Pendulum-v0') model = SAC(env, discrete=False, feature_extraction="mlp") model.learn(total_timesteps=500000)
total_timesteps = 5000000 train_freq = 1 irm_replay = False irm_pol_coef = 0.1 log_path = 'mtr_test' log_interval = 10 logger.configure(log_path, format_strs=['stdout', 'csv', 'log']) lr = 3e-4 learning_starts = 100 ent_coef = 'auto' model = SAC(MlpPolicy, env, verbose=1, batch_size=256, learning_rate=lr, env_adaptation_fn = env_adaptation_fn, eval_env=eval_env, eval_env_params=eval_env_params, eval_interval=eval_interval, num_eps_per_eval=num_eps_per_eval, replay_buffer_type=replay_buffer_type, replay_buffer_params=replay_buffer_params, train_freq=train_freq, irm_replay=irm_replay, irm_pol_coef=irm_pol_coef, learning_starts=learning_starts, ent_coef=ent_coef, policy_kwargs={'layers': [256, 256]}) model.learn(total_timesteps=total_timesteps, log_interval=log_interval)