def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2 * info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False self.cur_my_posture = [] self.cur_op_posture = [] self.cur_ball = [] self.idx = 0 self.wheels = [0 for _ in range(10)] self.state_size = 5 # the number of possible states self.action_size = 3 # the number of possible actions self.agent = DQNAgent(self.state_size, self.action_size) self.global_step = 0 # iteration step self.update_step = 0 self.save_every_steps = 12000 # save the model self.step = 0 # statistic step self.stats_steps = 6000 # for tensorboard self.reward_sum = 0 self.score_op = False self.score_op_sum = 0 self.boal_far = True return
def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2*info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False ################################################################## # team info, 5 robots, (x,y,th,active,touch) self.cur_my = [[] for _ in range(self.number_of_robots)] self.cur_op = [[] for _ in range(self.number_of_robots)] self.cur_ball = [] # ball (x,y) position self.prev_ball = [0., 0.] # previous ball (x,y) position # indicate active flag of previous frame, (team, robot index) self.active_flag = [[True for _ in range(self.number_of_robots)], [True for _ in range(self.number_of_robots)]] # distance to the ball, (team, robot index) self.dist_ball = np.zeros((2, self.number_of_robots)) # index for which my robot is close to the ball self.idxs = [i for i in range(self.number_of_robots)] self.dlck_cnt = 0 # deadlock count # how many times avoid deadlock function was called self.avoid_dlck_cnt = 0 self.wheels = np.zeros(self.number_of_robots*2) ################################################################## self.state_dim = 3 # relative ball self.action_dim = 2 # avoid dead lock or not self.coach_agent = DQNAgent(self.state_dim, self.action_dim) self.train_step = 0 self.save_steps = 12000 # save every 10 minuites self.tb_steps = 6000 # write to tensorboard self.rwd_sum = 0 self.scr_sum = 0 return
class Component(ApplicationSession): """ AI Base + Skeleton """ def __init__(self, config): ApplicationSession.__init__(self, config) def printConsole(self, message): print(message) sys.__stdout__.flush() def onConnect(self): self.join(self.config.realm) @inlineCallbacks def onJoin(self, details): ############################################################################## def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2 * info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False self.cur_my_posture = [] self.cur_op_posture = [] self.cur_ball = [] self.idx = 0 self.wheels = [0 for _ in range(10)] self.state_size = 5 # the number of possible states self.action_size = 3 # the number of possible actions self.agent = DQNAgent(self.state_size, self.action_size) self.global_step = 0 # iteration step self.update_step = 0 self.save_every_steps = 12000 # save the model self.step = 0 # statistic step self.stats_steps = 6000 # for tensorboard self.reward_sum = 0 self.score_op = False self.score_op_sum = 0 self.boal_far = True return ############################################################################## try: info = yield self.call(u'aiwc.get_info', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: try: self.sub = yield self.subscribe(self.on_event, args.key) except Exception as e2: self.printConsole("Error: {}".format(e2)) init_variables(self, info) try: yield self.call(u'aiwc.ready', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: self.printConsole("I am ready for the game!") def get_coord(self, received_frame): self.cur_ball = received_frame.coordinates[BALL] self.cur_my_posture = received_frame.coordinates[MY_TEAM] self.cur_op_posture = received_frame.coordinates[OP_TEAM] def find_closest_robot(self): min_idx = 0 min_distance = 9999.99 for i in range(self.number_of_robots - 1): measured_distance = helper.distance(self.cur_ball[X], self.cur_my_posture[i][X], self.cur_ball[Y], self.cur_my_posture[i][Y]) if (measured_distance < min_distance): min_distance = measured_distance min_idx = i self.idx = min_idx def set_wheel_velocity(self, robot_id, left_wheel, right_wheel): multiplier = 1 if (abs(left_wheel) > self.max_linear_velocity or abs(right_wheel) > self.max_linear_velocity): if (abs(left_wheel) > abs(right_wheel)): multiplier = self.max_linear_velocity / abs(left_wheel) else: multiplier = self.max_linear_velocity / abs(right_wheel) self.wheels[2 * robot_id] = left_wheel * multiplier self.wheels[2 * robot_id + 1] = right_wheel * multiplier def position(self, robot_id, x, y): damping = 0.35 mult_lin = 3.5 mult_ang = 0.4 ka = 0 sign = 1 dx = x - self.cur_my_posture[robot_id][X] dy = y - self.cur_my_posture[robot_id][Y] d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2)) desired_th = (math.pi / 2) if (dx == 0 and dy == 0) else math.atan2(dy, dx) d_th = desired_th - self.cur_my_posture[robot_id][TH] while (d_th > math.pi): d_th -= 2 * math.pi while (d_th < -math.pi): d_th += 2 * math.pi if (d_e > 1): ka = 17 / 90 elif (d_e > 0.5): ka = 19 / 90 elif (d_e > 0.3): ka = 21 / 90 elif (d_e > 0.2): ka = 23 / 90 else: ka = 25 / 90 if (d_th > helper.degree2radian(95)): d_th -= math.pi sign = -1 elif (d_th < helper.degree2radian(-95)): d_th += math.pi sign = -1 if (abs(d_th) > helper.degree2radian(85)): self.set_wheel_velocity(robot_id, -mult_ang * d_th, mult_ang * d_th) else: if (d_e < 5 and abs(d_th) < helper.degree2radian(40)): ka = 0.1 ka *= 4 self.set_wheel_velocity( robot_id, sign * (mult_lin * (1 / (1 + math.exp(-3 * d_e)) - damping) - mult_ang * ka * d_th), sign * (mult_lin * (1 / (1 + math.exp(-3 * d_e)) - damping) + mult_ang * ka * d_th)) @inlineCallbacks def on_event(self, f): @inlineCallbacks def set_wheel(self, robot_wheels): yield self.call(u'aiwc.set_speed', args.key, robot_wheels) return def goalie(self, robot_id): # Goalie just track the ball[Y] position at a fixed position on the X axis x = (-self.field[X] / 2) + (self.robot_size / 2) + 0.05 y = max( min(self.cur_ball[Y], (self.goal[Y] / 2 - self.robot_size / 2)), -self.goal[Y] / 2 + self.robot_size / 2) self.position(robot_id, x, y) def defender(self, robot_id, idx, offset_y): ox = 0.1 oy = 0.075 min_x = (-self.field[X] / 2) + (self.robot_size / 2) + 0.05 # If ball is on offense if (self.cur_ball[X] > 0): # If ball is in the upper part of the field (y>0) if (self.cur_ball[Y] > 0): self.position( robot_id, (self.cur_ball[X] - self.field[X] / 2) / 2, (min(self.cur_ball[Y], self.field[Y] / 3)) + offset_y) # If ball is in the lower part of the field (y<0) else: self.position( robot_id, (self.cur_ball[X] - self.field[X] / 2) / 2, (max(self.cur_ball[Y], -self.field[Y] / 3)) + offset_y) # If ball is on defense else: # If robot is in front of the ball if (self.cur_my_posture[robot_id][X] > self.cur_ball[X] - ox): # If this defender is the nearest defender from the ball if (robot_id == idx): self.position( robot_id, (self.cur_ball[X] - ox), ((self.cur_ball[Y] + oy) if (self.cur_my_posture[robot_id][Y] < 0) else (self.cur_ball[Y] - oy))) else: self.position( robot_id, (max(self.cur_ball[X] - 0.03, min_x)), ((self.cur_my_posture[robot_id][Y] + 0.03) if (self.cur_my_posture[robot_id][Y] < 0) else (self.cur_my_posture[robot_id][Y] - 0.03))) # If robot is behind the ball else: if (robot_id == idx): self.position(robot_id, self.cur_ball[X], self.cur_ball[Y]) else: self.position( robot_id, (max(self.cur_ball[X] - 0.03, min_x)), ((self.cur_my_posture[robot_id][Y] + 0.03) if (self.cur_my_posture[robot_id][Y] < 0) else (self.cur_my_posture[robot_id][Y] - 0.03))) def midfielder(self, robot_id, idx, offset_y): ox = 0.1 oy = 0.075 ball_dist = helper.distance(self.cur_my_posture[robot_id][X], self.cur_ball[X], self.cur_my_posture[robot_id][Y], self.cur_ball[Y]) goal_dist = helper.distance(self.cur_my_posture[robot_id][X], self.field[X] / 2, self.cur_my_posture[robot_id][Y], 0) if (robot_id == idx): if (ball_dist < 0.04): # if near the ball and near the opposite team goal if (goal_dist < 1.0): self.position(robot_id, self.field[X] / 2, 0) else: # if near and in front of the ball if (self.cur_ball[X] < self.cur_my_posture[robot_id][X] - 0.044): x_suggest = self.cur_ball[X] - 0.044 self.position(robot_id, x_suggest, self.cur_ball[Y]) # if near and behind the ball else: self.position(robot_id, self.field[X] + self.goal[X], -self.goal[Y] / 2) else: if (self.cur_ball[X] < self.cur_my_posture[robot_id][X]): if (self.cur_ball[Y] > 0): self.position( robot_id, self.cur_ball[X] - ox, min(self.cur_ball[Y] - oy, 0.45 * self.field[Y])) else: self.position( robot_id, self.cur_ball[X] - ox, min(self.cur_ball[Y] + oy, -0.45 * self.field[Y])) else: self.position(robot_id, self.cur_ball[X], self.cur_ball[Y]) else: self.position(robot_id, self.cur_ball[X] - 0.1, self.cur_ball[Y] + offset_y) def set_action(self, robot_id, action_number): if action_number == 0: # go behind the ball up self.printConsole("behind the ball up") self.position(robot_id, (self.cur_ball[X] - 0.13), (self.cur_ball[Y] + 0.1)) elif action_number == 1: # go behind the ball down self.printConsole("behind the ball down") self.position(robot_id, (self.cur_ball[X] - 0.13), (self.cur_ball[Y] - 0.1)) elif action_number == 2: # hit the ball self.printConsole("hit the ball") delta = [ self.cur_ball[X] - self.cur_my_posture[robot_id][X], self.cur_ball[Y] - self.cur_my_posture[robot_id][Y] ] self.position(robot_id, self.cur_ball[X] + 10 * delta[X], self.cur_ball[Y] + 10 * delta[Y]) # initiate empty frame received_frame = Frame() if 'time' in f: received_frame.time = f['time'] if 'score' in f: received_frame.score = f['score'] if 'reset_reason' in f: received_frame.reset_reason = f['reset_reason'] if 'coordinates' in f: received_frame.coordinates = f['coordinates'] if 'EOF' in f: self.end_of_frame = f['EOF'] #self.printConsole(received_frame.time) #self.printConsole(received_frame.score) #self.printConsole(received_frame.reset_reason) #self.printConsole(self.end_of_frame) ############################################################################## ############################################################################## if (self.end_of_frame): # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH]) #self.printConsole(received_frame.coordinates[BALL][X]) #self.printConsole(received_frame.coordinates[BALL][Y]) self.get_coord(received_frame) ############################################################################## reward = 0 if (received_frame.reset_reason == SCORE_OPPONENT): self.score_op = True # if ball is near my goal post and goalie is active, start memory and train if (self.cur_ball[X] < -1.3) and (abs(self.cur_ball[Y]) < 0.75) and self.cur_my_posture[4][ACTIVE]: # next state self.find_closest_robot() next_state = [ round(self.cur_my_posture[4][X] / 2.05, 2), round(self.cur_my_posture[4][Y] / 1.35, 2), round(self.cur_my_posture[4][TH] / (2 * math.pi), 2), round(self.cur_ball[X] / 2.05, 2), round(self.cur_ball[Y] / 1.35, 2) ] next_state = np.reshape([next_state], (1, self.state_size, 1)) next_history = np.append(next_state, self.agent.history[:, :, :3], axis=2) # Reward # reward = 0 if (self.cur_ball[X] < -1.4) and (-0.65 < self.cur_ball[Y] < 0.65): # if the ball is near my goal post -> penalty reward += (self.cur_ball[X] + 1.4) + (abs(self.cur_ball[Y]) - 0.65) if self.score_op: # if oppenent goal -> penalty reward -= 100 self.score_op_sum += 1 self.printConsole("score opponet") self.score_op = False if reward != 0: self.printConsole("step reward: " + str(reward)) # Reset self.printConsole("boal far: " + str(self.boal_far)) # save the sample <s, a, r, s'> to the replay memory self.agent.replay_memory(self.agent.history, self.agent.action, reward, next_history, self.boal_far) # every time step do the training if len(self.agent.memory) >= self.agent.train_start: self.agent.train_replay() # save the history and get action self.agent.history = next_history self.agent.action = self.agent.get_action( np.reshape(self.agent.history, (1, -1))) #self.printConsole("agent action: " + str(self.agent.action)) # Set goalkeeper wheels set_action(self, 4, self.agent.action) self.boal_far = False self.update_step += 1 self.step += 1 if self.update_step % self.agent.update_target_rate == 0: # every reset update the target model to be same with model self.agent.update_target_model() self.printConsole("update target") else: # self.printConsole("stay at goal post") goalie(self, 4) self.boal_far = True # other robot Functions #goalie(self, 4) #defender(self, 3, self.idx, 0.2) #defender(self, 2, self.idx, -0.2) #midfielder(self, 1, self.idx, 0.15) #midfielder(self, 0, self.idx, -0.15) set_wheel(self, self.wheels) # go one step self.global_step += 1 self.reward_sum += reward ############################################################################## # for tensorboard self.agent.avg_q_max += np.amax( self.agent.model.predict( np.reshape(self.agent.history, (1, -1)))[0]) ############################################################################## if self.global_step % 50 == 0: # print current status self.printConsole("step: " + str(self.global_step) + ", Epsilon: " + str(self.agent.epsilon)) self.printConsole("update step: " + str(self.update_step)) if self.global_step % self.save_every_steps == 0: # save the model self.agent.save_model("./save_model/aiwc_dqn.h5") self.printConsole("Saved model") if self.global_step % self.stats_steps == 0: # plot the statics stats = [ self.reward_sum, self.agent.avg_q_max / float(self.step), self.step, self.agent.avg_loss / float(self.step), self.score_op_sum ] for i in range(len(stats)): self.agent.sess.run(self.agent.update_ops[i], feed_dict={ self.agent.summary_placeholders[i]: float(stats[i]) }) summary_str = self.agent.sess.run(self.agent.summary_op) self.agent.summary_writer.add_summary( summary_str, self.global_step / self.stats_steps) self.printConsole("average reward: " + str(self.reward_sum / float(self.step)) + ", average_q: " + str(self.agent.avg_q_max / float(self.step)) + ", average loss: " + str(self.agent.avg_loss / float(self.step)) + ", oppenent socore for " + str(self.stats_steps) + " steps: " + str(self.score_op_sum)) self.reward_sum, self.agent.avg_q_max, self.agent.avg_loss = 0, 0, 0 self.score_op_sum, self.step = 0, 0 ############################################################################## if (received_frame.reset_reason == GAME_END): #(virtual finish() in random_walk.cpp) #save your data with open(args.datapath + '/result.txt', 'w') as output: #output.write('yourvariables') output.close() #unsubscribe; reset or leave yield self.sub.unsubscribe() try: yield self.leave() except Exception as e: self.printConsole("Error: {}".format(e)) self.end_of_frame = False ############################################################################## ############################################################################## def onDisconnect(self): if reactor.running: reactor.stop()
class Component(ApplicationSession): def __init__(self, config): ApplicationSession.__init__(self, config) def printConsole(self, message): print(message) sys.__stdout__.flush() def onConnect(self): self.join(self.config.realm) @inlineCallbacks def onJoin(self, details): ############################################################################## def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2*info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False ################################################################## # team info, 5 robots, (x,y,th,active,touch) self.cur_my = [[] for _ in range(self.number_of_robots)] self.cur_op = [[] for _ in range(self.number_of_robots)] self.cur_ball = [] # ball (x,y) position self.prev_ball = [0., 0.] # previous ball (x,y) position # indicate active flag of previous frame, (team, robot index) self.active_flag = [[True for _ in range(self.number_of_robots)], [True for _ in range(self.number_of_robots)]] # distance to the ball, (team, robot index) self.dist_ball = np.zeros((2, self.number_of_robots)) # index for which my robot is close to the ball self.idxs = [i for i in range(self.number_of_robots)] self.dlck_cnt = 0 # deadlock count # how many times avoid deadlock function was called self.avoid_dlck_cnt = 0 self.wheels = np.zeros(self.number_of_robots*2) ################################################################## self.state_dim = 3 # relative ball self.action_dim = 2 # avoid dead lock or not self.coach_agent = DQNAgent(self.state_dim, self.action_dim) self.train_step = 0 self.save_steps = 12000 # save every 10 minuites self.tb_steps = 6000 # write to tensorboard self.rwd_sum = 0 self.scr_sum = 0 return ############################################################################## try: info = yield self.call(u'aiwc.get_info', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: try: self.sub = yield self.subscribe(self.on_event, args.key) except Exception as e2: self.printConsole("Error: {}".format(e2)) init_variables(self, info) try: yield self.call(u'aiwc.ready', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: self.printConsole("I am ready for the game!") ############################################################################## def get_coord(self, received_frame): self.cur_ball = received_frame.coordinates[BALL] self.cur_my = received_frame.coordinates[MY_TEAM] self.cur_op = received_frame.coordinates[OP_TEAM] def get_idxs(self): # sort according to distant to the ball # my team for i in range(self.number_of_robots): self.dist_ball[MY_TEAM][i] = helper.distance(self.cur_ball[X], self.cur_my[i][X], self.cur_ball[Y], self.cur_my[i][Y]) # opponent team for i in range(self.number_of_robots): self.dist_ball[OP_TEAM][i] = helper.distance(self.cur_ball[X], self.cur_op[i][X], self.cur_ball[Y], self.cur_op[i][Y]) # my team distance index idxs = sorted(range(len(self.dist_ball[MY_TEAM])), key=lambda k: self.dist_ball[MY_TEAM][k]) return idxs def count_deadlock(self): # delta of ball delta_b = helper.distance(self.cur_ball[X], self.prev_ball[X], \ self.cur_ball[Y], self.prev_ball[Y]) if (abs(self.cur_ball[Y]) > 0.65) and (delta_b < 0.02): self.dlck_cnt += 1 else: self.dlck_cnt = 0 self.avoid_dlck_cnt = 0 def get_reward(self): reward = 0 # if my robot become deactive, recieve penalty for i in range(self.number_of_robots): if (not self.cur_my[i][ACTIVE]) and (self.active_flag[MY_TEAM][i]): reward -= 1 self.printConsole("my team go to the bench") self.active_flag[MY_TEAM][i] = self.cur_my[i][ACTIVE] # if opponent robot become deactive, recieve reward for i in range(self.number_of_robots): if (not self.cur_op[i][ACTIVE]) and (self.active_flag[OP_TEAM][i]): reward += 1 self.printConsole("op team go to the bench") self.active_flag[OP_TEAM][i] = self.cur_op[i][ACTIVE] return reward def get_next_state(self): self.idxs = self.get_idxs() self.count_deadlock() # my team closest distance to the ball # op team closest distance to the ball # deadlock count next_state = [self.dist_ball[MY_TEAM][self.idxs[0]], self.dist_ball[OP_TEAM].min(), self.dlck_cnt] return np.array(next_state) def step(self, received_frame): self.get_coord(received_frame) reward = self.get_reward() next_state = self.get_next_state() if(received_frame.reset_reason != NONE) \ and (received_frame.reset_reason is not None): reset = True else: reset = False return reward, next_state, reset ############################################################################## # function for heuristic coach's action def count_goal_area(self): cnt = 0 for i in range(self.number_of_robots): if (abs(self.cur_my[i][X]) > 1.6) and (abs(self.cur_my[i][Y]) < 0.43): cnt += 1 return cnt def count_penalty_area(self): cnt = 0 for i in range(self.number_of_robots): if (abs(self.cur_my[i][X]) > 1.3) and (abs(self.cur_my[i][Y]) < 0.7): cnt += 1 return cnt ############################################################################## # function for heuristic moving def set_wheel_velocity(self, robot_id, left_wheel, right_wheel): multiplier = 1 if(abs(left_wheel) > self.max_linear_velocity or abs(right_wheel) > self.max_linear_velocity): if (abs(left_wheel) > abs(right_wheel)): multiplier = self.max_linear_velocity / abs(left_wheel) else: multiplier = self.max_linear_velocity / abs(right_wheel) self.wheels[2*robot_id] = left_wheel*multiplier self.wheels[2*robot_id + 1] = right_wheel*multiplier def position(self, robot_id, x, y): damping = 0.35 mult_lin = 3.5 mult_ang = 0.4 ka = 0 sign = 1 dx = x - self.cur_my[robot_id][X] dy = y - self.cur_my[robot_id][Y] d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2)) desired_th = (math.pi/2) if (dx == 0 and dy == 0) else math.atan2(dy, dx) d_th = desired_th - self.cur_my[robot_id][TH] while(d_th > math.pi): d_th -= 2*math.pi while(d_th < -math.pi): d_th += 2*math.pi if (d_e > 1): ka = 17/90 elif (d_e > 0.5): ka = 19/90 elif (d_e > 0.3): ka = 21/90 elif (d_e > 0.2): ka = 23/90 else: ka = 25/90 if (d_th > helper.degree2radian(95)): d_th -= math.pi sign = -1 elif (d_th < helper.degree2radian(-95)): d_th += math.pi sign = -1 if (abs(d_th) > helper.degree2radian(85)): self.set_wheel_velocity(robot_id, -mult_ang*d_th, mult_ang*d_th) else: if (d_e < 5 and abs(d_th) < helper.degree2radian(40)): ka = 0.1 ka *= 4 self.set_wheel_velocity(robot_id, sign * (mult_lin * (1 / (1 + math.exp(-3*d_e)) - damping) - mult_ang * ka * d_th), sign * (mult_lin * (1 / (1 + math.exp(-3*d_e)) - damping) + mult_ang * ka * d_th)) ############################################################################## @inlineCallbacks def on_event(self, f): @inlineCallbacks def set_wheel(self, robot_wheels): yield self.call(u'aiwc.set_speed', args.key, robot_wheels) return def avoid_goal_foul(self): midfielder(self, self.idxs[0]) midfielder(self, self.idxs[1]) self.position(self.idxs[2], 0, 0) self.position(self.idxs[3], 0, 0) self.position(self.idxs[4], 0, 0) def avoid_penalty_foul(self): midfielder(self, self.idxs[0]) midfielder(self, self.idxs[1]) midfielder(self, self.idxs[2]) self.position(self.idxs[3], 0, 0) self.position(self.idxs[4], 0, 0) def avoid_deadlock(self): self.position(0, self.cur_ball[X], 0) self.position(1, self.cur_ball[X], 0) self.position(2, self.cur_ball[X], 0) self.position(3, self.cur_ball[X], 0) self.position(4, self.cur_ball[X], 0) def midfielder(self, robot_id): goal_dist = helper.distance(self.cur_my[robot_id][X], self.field[X]/2, self.cur_my[robot_id][Y], 0) shoot_mul = 1 dribble_dist = 0.426 v = 5 goal_to_ball_unit = helper.unit([self.field[X]/2 - self.cur_ball[X], - self.cur_ball[Y]]) delta = [self.cur_ball[X] - self.cur_my[robot_id][X], self.cur_ball[Y] - self.cur_my[robot_id][Y]] if (self.dist_ball[MY_TEAM][robot_id] < 0.5) and (delta[X] > 0): self.position(robot_id, self.cur_ball[X] + v*delta[X], self.cur_ball[Y] + v*delta[Y]) else: self.position(robot_id, self.cur_ball[X] - dribble_dist*goal_to_ball_unit[X], self.cur_ball[Y] - dribble_dist*goal_to_ball_unit[Y]) def offense(self): midfielder(self, 0) midfielder(self, 1) midfielder(self, 2) midfielder(self, 3) midfielder(self, 4) def set_action(self, act_idx): if act_idx == 0: # count how many robots is in the goal area goal_area_cnt = self.count_goal_area() # count how many robots is in the penalty area penalty_area_cnt = self.count_penalty_area() if goal_area_cnt > 2: avoid_goal_foul(self) elif penalty_area_cnt > 3: avoid_penalty_foul(self) else: offense(self) elif act_idx == 1: avoid_deadlock(self) self.printConsole('avoid deadlock') else: self.printConsole('action index is over 1') # initiate empty frame received_frame = Frame() if 'time' in f: received_frame.time = f['time'] if 'score' in f: received_frame.score = f['score'] if 'reset_reason' in f: received_frame.reset_reason = f['reset_reason'] if 'coordinates' in f: received_frame.coordinates = f['coordinates'] if 'EOF' in f: self.end_of_frame = f['EOF'] #self.printConsole(received_frame.time) #self.printConsole(received_frame.score) #self.printConsole(received_frame.reset_reason) #self.printConsole(self.end_of_frame) ############################################################################## if (self.end_of_frame): # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH]) #self.printConsole(received_frame.coordinates[BALL][X]) #self.printConsole(received_frame.coordinates[BALL][Y]) ############################################################################## # get next state, reward, and reset info reward, next_state, reset = self.step(received_frame) self.printConsole('reward: '+str(reward)) self.printConsole('next state: '+str(next_state)) self.printConsole('reset '+str(reset)) next_state = np.reshape(next_state, (1, self.state_dim, 1)) # # update self.coach_agent.replay_memory(self.coach_agent.history, self.coach_agent.action, reward, next_state, reset) if len(self.coach_agent.memory) >= self.coach_agent.train_start: self.coach_agent.train_replay() self.printConsole('train') # # save next state self.coach_agent.history = next_state self.coach_agent.action = self.coach_agent.\ get_action(np.reshape(self.coach_agent.history, (1, -1))) # coach's action to positions to go set_action(self, self.coach_agent.action) set_wheel(self, self.wheels.tolist()) self.prev_ball = self.cur_ball self.train_step += 1 # increase global step self.rwd_sum += reward if received_frame.reset_reason == SCORE_MYTEAM: self.scr_sum += 1 elif received_frame.reset_reason == SCORE_OPPONENT: self.scr_sum -= 1 self.printConsole('step: ' + str(self.train_step)) ############################################################################## if self.train_step % self.save_steps == 0: self.coach_agent.save_model("./save_model/coach_dlck_dqn%s.h5" \ % self.train_step) self.printConsole('epsilon: '+str(self.coach_agent.epsilon)) if self.train_step % self.tb_steps == 0: data = [self.rwd_sum, self.scr_sum] for i in range(len(data)): self.coach_agent.sess.run(self.coach_agent.update_ops[i], feed_dict={self.coach_agent.summary_placeholders[i]: float(data[i])}) summary_str = self.coach_agent.sess.run(self.coach_agent.summary_op) self.coach_agent.summary_writer.add_summary( summary_str, self.train_step) self.rwd_sum, self.scr_sum = 0., 0. ############################################################################## if(received_frame.reset_reason == GAME_END): #(virtual finish() in random_walk.cpp) #save your data with open(args.datapath + '/result.txt', 'w') as output: #output.write('yourvariables') output.close() #unsubscribe; reset or leave yield self.sub.unsubscribe() try: yield self.leave() except Exception as e: self.printConsole("Error: {}".format(e)) self.end_of_frame = False ############################################################################## def onDisconnect(self): if reactor.running: reactor.stop()
class Component(ApplicationSession): """ AI Base + Skeleton """ def __init__(self, config): ApplicationSession.__init__(self, config) def printConsole(self, message): print(message) sys.__stdout__.flush() def onConnect(self): self.join(self.config.realm) @inlineCallbacks def onJoin(self, details): ############################################################################## def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2 * info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False self.cur_posture = [] self.cur_ball = [] self.prev_ball = [] self.idx = 0 self.wheels = [0 for _ in range(10)] self.state_size = 5 # the number of possible states self.action_size = 11 # the number of possible actions self.agent = DQNAgent(self.state_size, self.action_size) self.global_step = 0 # iteration step self.save_every_steps = 100 # save the model self.step = 0 # statistic step self.stats_steps = 100 # for tensorboard self.reward_sum = 0 return ############################################################################## try: info = yield self.call(u'aiwc.get_info', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: try: self.sub = yield self.subscribe(self.on_event, args.key) except Exception as e2: self.printConsole("Error: {}".format(e2)) init_variables(self, info) try: yield self.call(u'aiwc.ready', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: self.printConsole("I am ready for the game!") def get_coord(self, received_frame): self.cur_ball = received_frame.coordinates[BALL] self.cur_posture = received_frame.coordinates[MY_TEAM] @inlineCallbacks def on_event(self, f): @inlineCallbacks def set_wheel(self, robot_wheels): yield self.call(u'aiwc.set_speed', args.key, robot_wheels) return def set_action(robot_id, action_number): if action_number == 0: self.wheels[2 * robot_id] = 0.75 self.wheels[2 * robot_id + 1] = 0.75 # Go Forward with fixed velocity elif action_number == 1: self.wheels[2 * robot_id] = 0.75 self.wheels[2 * robot_id + 1] = 0.5 # Turn elif action_number == 2: self.wheels[2 * robot_id] = 0.75 self.wheels[2 * robot_id + 1] = 0.25 # Turn elif action_number == 3: self.wheels[2 * robot_id] = 0.75 self.wheels[2 * robot_id + 1] = 0 # Turn elif action_number == 4: self.wheels[2 * robot_id] = 0.5 self.wheels[2 * robot_id + 1] = 75 # Turn elif action_number == 5: self.wheels[2 * robot_id] = 0.25 self.wheels[2 * robot_id + 1] = 0.75 # Turn elif action_number == 6: self.wheels[2 * robot_id] = 0 self.wheels[2 * robot_id + 1] = 0.75 # Turn elif action_number == 7: self.wheels[2 * robot_id] = -0.75 self.wheels[2 * robot_id + 1] = -0.75 # Go Backward with fixed velocity elif action_number == 8: self.wheels[2 * robot_id] = -0.1 self.wheels[2 * robot_id + 1] = 0.1 # Spin elif action_number == 9: self.wheels[2 * robot_id] = 0.1 self.wheels[2 * robot_id + 1] = -0.1 # Spin elif action_number == 10: self.wheels[2 * robot_id] = 0 self.wheels[2 * robot_id + 1] = 0 # Do not move # initiate empty frame received_frame = Frame() if 'time' in f: received_frame.time = f['time'] if 'score' in f: received_frame.score = f['score'] if 'reset_reason' in f: received_frame.reset_reason = f['reset_reason'] if 'coordinates' in f: received_frame.coordinates = f['coordinates'] if 'EOF' in f: self.end_of_frame = f['EOF'] #self.printConsole(received_frame.time) #self.printConsole(received_frame.score) #self.printConsole(received_frame.reset_reason) #self.printConsole(self.end_of_frame) ############################################################################## ############################################################################## if (self.end_of_frame): # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH]) #self.printConsole(received_frame.coordinates[BALL][X]) #self.printConsole(received_frame.coordinates[BALL][Y]) ############################################################################## # next state self.get_coord(received_frame) next_state = [ round(received_frame.coordinates[MY_TEAM][0][X] / 2.05, 2), round(received_frame.coordinates[MY_TEAM][0][Y] / 1.35, 2), round( received_frame.coordinates[MY_TEAM][0][TH] / (2 * math.pi), 2), round(received_frame.coordinates[BALL][X] / 2.05, 2), round(received_frame.coordinates[BALL][Y] / 1.35, 2) ] next_state = np.reshape([next_state], (1, self.state_size, 1)) next_history = np.append(next_state, self.agent.history[:, :, :3], axis=2) # Reward reward = 3 * math.exp(-10 * (helper.distance( self.cur_posture[0][X], self.cur_ball[X], self.cur_posture[0][Y], self.cur_ball[Y]) / 20)) if self.cur_posture[0][TOUCH]: reward += 100 self.printConsole("ball touch, reward is " + str(reward)) # Reset reset = (received_frame.reset_reason == SCORE_MYTEAM) or ( received_frame.reset_reason == SCORE_OPPONENT) or (received_frame.reset_reason == DEADLOCK) ############################################################################## self.agent.avg_q_max += np.amax( self.agent.model.predict( np.reshape(self.agent.history, (1, -1)))[0]) #self.printConsole("history: " + str(next_history)) ############################################################################## # save the sample <s, a, r, s'> to the replay memory self.agent.replay_memory(self.agent.history, self.agent.action, reward, next_history, reset) # every time step do the training if len(self.agent.memory) >= self.agent.train_start: self.agent.train_replay() # save the history and get action self.agent.history = next_history self.agent.action = self.agent.get_action( np.reshape(self.agent.history, (1, -1))) #self.printConsole("agent.action: " + str(action)) # Set robot wheels set_action(0, self.agent.action) set_wheel(self, self.wheels) # go one step self.global_step += 1 self.step += 1 self.reward_sum += reward ############################################################################## if self.global_step % 50 == 0: self.printConsole("step: " + str(self.global_step) + ", Epsilon: " + str(self.agent.epsilon) + ", reward: " + str(reward)) if self.global_step % self.agent.update_target_rate == 0: # every reset update the target model to be same with model self.agent.update_target_model() self.printConsole("update target") if self.global_step % self.save_every_steps == 0: # save the model self.agent.save_model("./save_model/aiwc_dqn.h5") self.printConsole("Saved model") if self.global_step % self.stats_steps == 0: # plot the statics stats = [ self.reward_sum, self.agent.avg_q_max / float(self.step), self.step, self.agent.avg_loss / float(self.step) ] for i in range(len(stats)): self.agent.sess.run(self.agent.update_ops[i], feed_dict={ self.agent.summary_placeholders[i]: float(stats[i]) }) summary_str = self.agent.sess.run(self.agent.summary_op) self.agent.summary_writer.add_summary( summary_str, self.global_step / self.stats_steps) self.printConsole("average reward: " + str(self.reward_sum / float(self.step)) + ", average_q: " + str(self.agent.avg_q_max / float(self.step)) + ", average loss: " + str(self.agent.avg_loss / float(self.step))) self.reward_sum, self.agent.avg_q_max, self.agent.avg_loss = 0, 0, 0 self.step = 0 ############################################################################## if (received_frame.reset_reason == GAME_END): #(virtual finish() in random_walk.cpp) #save your data with open(args.datapath + '/result.txt', 'w') as output: #output.write('yourvariables') output.close() #unsubscribe; reset or leave yield self.sub.unsubscribe() try: yield self.leave() except Exception as e: self.printConsole("Error: {}".format(e)) self.end_of_frame = False ############################################################################## ############################################################################## def onDisconnect(self): if reactor.running: reactor.stop()