Python DQNAgent Exemples, aiwc_dqn.DQNAgent Python Exemples

Exemple #1

0

Afficher le fichier

        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.idx = 0
            self.wheels = [0 for _ in range(10)]

            self.state_size = 5  # the number of possible states
            self.action_size = 3  # the number of possible actions
            self.agent = DQNAgent(self.state_size, self.action_size)
            self.global_step = 0  # iteration step
            self.update_step = 0
            self.save_every_steps = 12000  # save the model

            self.step = 0  # statistic step
            self.stats_steps = 6000  # for tensorboard
            self.reward_sum = 0
            self.score_op = False
            self.score_op_sum = 0
            self.boal_far = True
            return

Exemple #2

0

Afficher le fichier

        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False

            ##################################################################
            # team info, 5 robots, (x,y,th,active,touch)
            self.cur_my = [[] for _ in range(self.number_of_robots)]
            self.cur_op = [[] for _ in range(self.number_of_robots)]

            self.cur_ball = [] # ball (x,y) position
            self.prev_ball = [0., 0.] # previous ball (x,y) position

            # indicate active flag of previous frame, (team, robot index)
            self.active_flag = [[True for _ in range(self.number_of_robots)], 
                                [True for _ in range(self.number_of_robots)]]

            # distance to the ball, (team, robot index)
            self.dist_ball = np.zeros((2, self.number_of_robots))
            # index for which my robot is close to the ball
            self.idxs = [i for i in range(self.number_of_robots)]

            self.dlck_cnt = 0 # deadlock count
            # how many times avoid deadlock function was called
            self.avoid_dlck_cnt = 0             

            self.wheels = np.zeros(self.number_of_robots*2)
            ##################################################################
            self.state_dim = 3 # relative ball
            self.action_dim = 2 # avoid dead lock or not
            self.coach_agent = DQNAgent(self.state_dim, self.action_dim)

            self.train_step = 0
            self.save_steps = 12000 # save every 10 minuites
            self.tb_steps = 6000 # write to tensorboard
            self.rwd_sum = 0
            self.scr_sum = 0
            return

Exemple #3

0

Afficher le fichier

class Component(ApplicationSession):
    """
    AI Base + Skeleton
    """
    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

        ##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.idx = 0
            self.wheels = [0 for _ in range(10)]

            self.state_size = 5  # the number of possible states
            self.action_size = 3  # the number of possible actions
            self.agent = DQNAgent(self.state_size, self.action_size)
            self.global_step = 0  # iteration step
            self.update_step = 0
            self.save_every_steps = 12000  # save the model

            self.step = 0  # statistic step
            self.stats_steps = 6000  # for tensorboard
            self.reward_sum = 0
            self.score_op = False
            self.score_op_sum = 0
            self.boal_far = True
            return
##############################################################################

        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))

        init_variables(self, info)

        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture = received_frame.coordinates[OP_TEAM]

    def find_closest_robot(self):
        min_idx = 0
        min_distance = 9999.99
        for i in range(self.number_of_robots - 1):
            measured_distance = helper.distance(self.cur_ball[X],
                                                self.cur_my_posture[i][X],
                                                self.cur_ball[Y],
                                                self.cur_my_posture[i][Y])
            if (measured_distance < min_distance):
                min_distance = measured_distance
                min_idx = i
        self.idx = min_idx

    def set_wheel_velocity(self, robot_id, left_wheel, right_wheel):
        multiplier = 1

        if (abs(left_wheel) > self.max_linear_velocity
                or abs(right_wheel) > self.max_linear_velocity):
            if (abs(left_wheel) > abs(right_wheel)):
                multiplier = self.max_linear_velocity / abs(left_wheel)
            else:
                multiplier = self.max_linear_velocity / abs(right_wheel)

        self.wheels[2 * robot_id] = left_wheel * multiplier
        self.wheels[2 * robot_id + 1] = right_wheel * multiplier

    def position(self, robot_id, x, y):
        damping = 0.35
        mult_lin = 3.5
        mult_ang = 0.4
        ka = 0
        sign = 1

        dx = x - self.cur_my_posture[robot_id][X]
        dy = y - self.cur_my_posture[robot_id][Y]
        d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2))
        desired_th = (math.pi /
                      2) if (dx == 0 and dy == 0) else math.atan2(dy, dx)

        d_th = desired_th - self.cur_my_posture[robot_id][TH]
        while (d_th > math.pi):
            d_th -= 2 * math.pi
        while (d_th < -math.pi):
            d_th += 2 * math.pi

        if (d_e > 1):
            ka = 17 / 90
        elif (d_e > 0.5):
            ka = 19 / 90
        elif (d_e > 0.3):
            ka = 21 / 90
        elif (d_e > 0.2):
            ka = 23 / 90
        else:
            ka = 25 / 90

        if (d_th > helper.degree2radian(95)):
            d_th -= math.pi
            sign = -1
        elif (d_th < helper.degree2radian(-95)):
            d_th += math.pi
            sign = -1

        if (abs(d_th) > helper.degree2radian(85)):
            self.set_wheel_velocity(robot_id, -mult_ang * d_th,
                                    mult_ang * d_th)
        else:
            if (d_e < 5 and abs(d_th) < helper.degree2radian(40)):
                ka = 0.1
            ka *= 4
            self.set_wheel_velocity(
                robot_id,
                sign *
                (mult_lin *
                 (1 /
                  (1 + math.exp(-3 * d_e)) - damping) - mult_ang * ka * d_th),
                sign *
                (mult_lin *
                 (1 /
                  (1 + math.exp(-3 * d_e)) - damping) + mult_ang * ka * d_th))

    @inlineCallbacks
    def on_event(self, f):
        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        def goalie(self, robot_id):
            # Goalie just track the ball[Y] position at a fixed position on the X axis
            x = (-self.field[X] / 2) + (self.robot_size / 2) + 0.05
            y = max(
                min(self.cur_ball[Y],
                    (self.goal[Y] / 2 - self.robot_size / 2)),
                -self.goal[Y] / 2 + self.robot_size / 2)
            self.position(robot_id, x, y)

        def defender(self, robot_id, idx, offset_y):
            ox = 0.1
            oy = 0.075
            min_x = (-self.field[X] / 2) + (self.robot_size / 2) + 0.05

            # If ball is on offense
            if (self.cur_ball[X] > 0):
                # If ball is in the upper part of the field (y>0)
                if (self.cur_ball[Y] > 0):
                    self.position(
                        robot_id, (self.cur_ball[X] - self.field[X] / 2) / 2,
                        (min(self.cur_ball[Y], self.field[Y] / 3)) + offset_y)
                # If ball is in the lower part of the field (y<0)
                else:
                    self.position(
                        robot_id, (self.cur_ball[X] - self.field[X] / 2) / 2,
                        (max(self.cur_ball[Y], -self.field[Y] / 3)) + offset_y)
            # If ball is on defense
            else:
                # If robot is in front of the ball
                if (self.cur_my_posture[robot_id][X] > self.cur_ball[X] - ox):
                    # If this defender is the nearest defender from the ball
                    if (robot_id == idx):
                        self.position(
                            robot_id, (self.cur_ball[X] - ox),
                            ((self.cur_ball[Y] + oy) if
                             (self.cur_my_posture[robot_id][Y] < 0) else
                             (self.cur_ball[Y] - oy)))
                    else:
                        self.position(
                            robot_id, (max(self.cur_ball[X] - 0.03, min_x)),
                            ((self.cur_my_posture[robot_id][Y] + 0.03) if
                             (self.cur_my_posture[robot_id][Y] < 0) else
                             (self.cur_my_posture[robot_id][Y] - 0.03)))
                # If robot is behind the ball
                else:
                    if (robot_id == idx):
                        self.position(robot_id, self.cur_ball[X],
                                      self.cur_ball[Y])
                    else:
                        self.position(
                            robot_id, (max(self.cur_ball[X] - 0.03, min_x)),
                            ((self.cur_my_posture[robot_id][Y] + 0.03) if
                             (self.cur_my_posture[robot_id][Y] < 0) else
                             (self.cur_my_posture[robot_id][Y] - 0.03)))

        def midfielder(self, robot_id, idx, offset_y):
            ox = 0.1
            oy = 0.075
            ball_dist = helper.distance(self.cur_my_posture[robot_id][X],
                                        self.cur_ball[X],
                                        self.cur_my_posture[robot_id][Y],
                                        self.cur_ball[Y])
            goal_dist = helper.distance(self.cur_my_posture[robot_id][X],
                                        self.field[X] / 2,
                                        self.cur_my_posture[robot_id][Y], 0)

            if (robot_id == idx):
                if (ball_dist < 0.04):
                    # if near the ball and near the opposite team goal
                    if (goal_dist < 1.0):
                        self.position(robot_id, self.field[X] / 2, 0)
                    else:
                        # if near and in front of the ball
                        if (self.cur_ball[X] <
                                self.cur_my_posture[robot_id][X] - 0.044):
                            x_suggest = self.cur_ball[X] - 0.044
                            self.position(robot_id, x_suggest,
                                          self.cur_ball[Y])
                        # if near and behind the ball
                        else:
                            self.position(robot_id,
                                          self.field[X] + self.goal[X],
                                          -self.goal[Y] / 2)
                else:
                    if (self.cur_ball[X] < self.cur_my_posture[robot_id][X]):
                        if (self.cur_ball[Y] > 0):
                            self.position(
                                robot_id, self.cur_ball[X] - ox,
                                min(self.cur_ball[Y] - oy,
                                    0.45 * self.field[Y]))
                        else:
                            self.position(
                                robot_id, self.cur_ball[X] - ox,
                                min(self.cur_ball[Y] + oy,
                                    -0.45 * self.field[Y]))
                    else:
                        self.position(robot_id, self.cur_ball[X],
                                      self.cur_ball[Y])
            else:
                self.position(robot_id, self.cur_ball[X] - 0.1,
                              self.cur_ball[Y] + offset_y)

        def set_action(self, robot_id, action_number):
            if action_number == 0:
                # go behind the ball up
                self.printConsole("behind the ball up")
                self.position(robot_id, (self.cur_ball[X] - 0.13),
                              (self.cur_ball[Y] + 0.1))
            elif action_number == 1:
                # go behind the ball down
                self.printConsole("behind the ball down")
                self.position(robot_id, (self.cur_ball[X] - 0.13),
                              (self.cur_ball[Y] - 0.1))
            elif action_number == 2:
                # hit the ball
                self.printConsole("hit the ball")
                delta = [
                    self.cur_ball[X] - self.cur_my_posture[robot_id][X],
                    self.cur_ball[Y] - self.cur_my_posture[robot_id][Y]
                ]
                self.position(robot_id, self.cur_ball[X] + 10 * delta[X],
                              self.cur_ball[Y] + 10 * delta[Y])

        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']
        if 'EOF' in f:
            self.end_of_frame = f['EOF']

        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################
##############################################################################

        if (self.end_of_frame):

            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])

            self.get_coord(received_frame)
            ##############################################################################
            reward = 0
            if (received_frame.reset_reason == SCORE_OPPONENT):
                self.score_op = True

            # if ball is near my goal post and goalie is active, start memory and train
            if (self.cur_ball[X] <
                    -1.3) and (abs(self.cur_ball[Y]) <
                               0.75) and self.cur_my_posture[4][ACTIVE]:
                # next state
                self.find_closest_robot()
                next_state = [
                    round(self.cur_my_posture[4][X] / 2.05, 2),
                    round(self.cur_my_posture[4][Y] / 1.35, 2),
                    round(self.cur_my_posture[4][TH] / (2 * math.pi), 2),
                    round(self.cur_ball[X] / 2.05, 2),
                    round(self.cur_ball[Y] / 1.35, 2)
                ]
                next_state = np.reshape([next_state], (1, self.state_size, 1))
                next_history = np.append(next_state,
                                         self.agent.history[:, :, :3],
                                         axis=2)
                # Reward
                # reward = 0
                if (self.cur_ball[X] < -1.4) and (-0.65 < self.cur_ball[Y] <
                                                  0.65):
                    # if the ball is near my goal post -> penalty
                    reward += (self.cur_ball[X] +
                               1.4) + (abs(self.cur_ball[Y]) - 0.65)
                if self.score_op:
                    # if oppenent goal -> penalty
                    reward -= 100
                    self.score_op_sum += 1
                    self.printConsole("score opponet")
                    self.score_op = False
                if reward != 0:
                    self.printConsole("step reward: " + str(reward))
                # Reset
                self.printConsole("boal far: " + str(self.boal_far))

                # save the sample <s, a, r, s'> to the replay memory
                self.agent.replay_memory(self.agent.history, self.agent.action,
                                         reward, next_history, self.boal_far)
                # every time step do the training
                if len(self.agent.memory) >= self.agent.train_start:
                    self.agent.train_replay()

                # save the history and get action
                self.agent.history = next_history
                self.agent.action = self.agent.get_action(
                    np.reshape(self.agent.history, (1, -1)))
                #self.printConsole("agent action: " + str(self.agent.action))

                # Set goalkeeper wheels
                set_action(self, 4, self.agent.action)
                self.boal_far = False
                self.update_step += 1
                self.step += 1

                if self.update_step % self.agent.update_target_rate == 0:
                    # every reset update the target model to be same with model
                    self.agent.update_target_model()
                    self.printConsole("update target")
            else:
                # self.printConsole("stay at goal post")
                goalie(self, 4)
                self.boal_far = True

            # other robot Functions
            #goalie(self, 4)
            #defender(self, 3, self.idx, 0.2)
            #defender(self, 2, self.idx, -0.2)
            #midfielder(self, 1, self.idx, 0.15)
            #midfielder(self, 0, self.idx, -0.15)

            set_wheel(self, self.wheels)

            # go one step
            self.global_step += 1
            self.reward_sum += reward
            ##############################################################################
            # for tensorboard
            self.agent.avg_q_max += np.amax(
                self.agent.model.predict(
                    np.reshape(self.agent.history, (1, -1)))[0])
            ##############################################################################
            if self.global_step % 50 == 0:
                # print current status
                self.printConsole("step: " + str(self.global_step) +
                                  ", Epsilon: " + str(self.agent.epsilon))
                self.printConsole("update step: " + str(self.update_step))

            if self.global_step % self.save_every_steps == 0:  # save the model
                self.agent.save_model("./save_model/aiwc_dqn.h5")
                self.printConsole("Saved model")

            if self.global_step % self.stats_steps == 0:  # plot the statics
                stats = [
                    self.reward_sum, self.agent.avg_q_max / float(self.step),
                    self.step, self.agent.avg_loss / float(self.step),
                    self.score_op_sum
                ]
                for i in range(len(stats)):
                    self.agent.sess.run(self.agent.update_ops[i],
                                        feed_dict={
                                            self.agent.summary_placeholders[i]:
                                            float(stats[i])
                                        })
                summary_str = self.agent.sess.run(self.agent.summary_op)
                self.agent.summary_writer.add_summary(
                    summary_str, self.global_step / self.stats_steps)

                self.printConsole("average reward: " +
                                  str(self.reward_sum / float(self.step)) +
                                  ", average_q: " + str(self.agent.avg_q_max /
                                                        float(self.step)) +
                                  ", average loss: " +
                                  str(self.agent.avg_loss / float(self.step)) +
                                  ", oppenent socore for " +
                                  str(self.stats_steps) + " steps: " +
                                  str(self.score_op_sum))
                self.reward_sum, self.agent.avg_q_max, self.agent.avg_loss = 0, 0, 0
                self.score_op_sum, self.step = 0, 0
##############################################################################
            if (received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False


##############################################################################
##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()

Exemple #4

0

Afficher le fichier

class Component(ApplicationSession):

    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False

            ##################################################################
            # team info, 5 robots, (x,y,th,active,touch)
            self.cur_my = [[] for _ in range(self.number_of_robots)]
            self.cur_op = [[] for _ in range(self.number_of_robots)]

            self.cur_ball = [] # ball (x,y) position
            self.prev_ball = [0., 0.] # previous ball (x,y) position

            # indicate active flag of previous frame, (team, robot index)
            self.active_flag = [[True for _ in range(self.number_of_robots)], 
                                [True for _ in range(self.number_of_robots)]]

            # distance to the ball, (team, robot index)
            self.dist_ball = np.zeros((2, self.number_of_robots))
            # index for which my robot is close to the ball
            self.idxs = [i for i in range(self.number_of_robots)]

            self.dlck_cnt = 0 # deadlock count
            # how many times avoid deadlock function was called
            self.avoid_dlck_cnt = 0             

            self.wheels = np.zeros(self.number_of_robots*2)
            ##################################################################
            self.state_dim = 3 # relative ball
            self.action_dim = 2 # avoid dead lock or not
            self.coach_agent = DQNAgent(self.state_dim, self.action_dim)

            self.train_step = 0
            self.save_steps = 12000 # save every 10 minuites
            self.tb_steps = 6000 # write to tensorboard
            self.rwd_sum = 0
            self.scr_sum = 0
            return
##############################################################################
        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))
               
        init_variables(self, info)
        
        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")
##############################################################################
    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my = received_frame.coordinates[MY_TEAM]
        self.cur_op = received_frame.coordinates[OP_TEAM]

    def get_idxs(self):
        # sort according to distant to the ball
        # my team
        for i in range(self.number_of_robots):
            self.dist_ball[MY_TEAM][i] = helper.distance(self.cur_ball[X], 
                            self.cur_my[i][X], self.cur_ball[Y], self.cur_my[i][Y])
        # opponent team
        for i in range(self.number_of_robots):
            self.dist_ball[OP_TEAM][i] = helper.distance(self.cur_ball[X], 
                            self.cur_op[i][X], self.cur_ball[Y], self.cur_op[i][Y])

        # my team distance index
        idxs = sorted(range(len(self.dist_ball[MY_TEAM])), 
                                        key=lambda k: self.dist_ball[MY_TEAM][k])
        return idxs

    def count_deadlock(self):
        # delta of ball
        delta_b = helper.distance(self.cur_ball[X], self.prev_ball[X], \
                                    self.cur_ball[Y], self.prev_ball[Y])

        if (abs(self.cur_ball[Y]) > 0.65) and (delta_b < 0.02):
            self.dlck_cnt += 1
        else:
            self.dlck_cnt = 0
            self.avoid_dlck_cnt = 0

    def get_reward(self):
        reward = 0
        # if my robot become deactive, recieve penalty
        for i in range(self.number_of_robots):
            if (not self.cur_my[i][ACTIVE]) and (self.active_flag[MY_TEAM][i]):
                reward -= 1
                self.printConsole("my team go to the bench")
            self.active_flag[MY_TEAM][i] = self.cur_my[i][ACTIVE]

        # if opponent robot become deactive, recieve reward
        for i in range(self.number_of_robots):
            if (not self.cur_op[i][ACTIVE]) and (self.active_flag[OP_TEAM][i]):
                reward += 1
                self.printConsole("op team go to the bench")
            self.active_flag[OP_TEAM][i] = self.cur_op[i][ACTIVE]            

        return reward

    def get_next_state(self):
        self.idxs  = self.get_idxs()
        self.count_deadlock()

        # my team closest distance to the ball
        # op team closest distance to the ball
        # deadlock count
        next_state = [self.dist_ball[MY_TEAM][self.idxs[0]], 
                                    self.dist_ball[OP_TEAM].min(), self.dlck_cnt]

        return np.array(next_state)

    def step(self, received_frame):
        self.get_coord(received_frame)
        reward = self.get_reward()
        next_state = self.get_next_state()

        if(received_frame.reset_reason != NONE) \
                                    and (received_frame.reset_reason is not None):
            reset = True
        else:
            reset = False

        return reward, next_state, reset
##############################################################################
    # function for heuristic coach's action        
    def count_goal_area(self):
        cnt = 0
        for i in range(self.number_of_robots):
            if (abs(self.cur_my[i][X]) > 1.6) and (abs(self.cur_my[i][Y]) < 0.43):
                cnt += 1
        return cnt

    def count_penalty_area(self):
        cnt = 0
        for i in range(self.number_of_robots):
            if (abs(self.cur_my[i][X]) > 1.3) and (abs(self.cur_my[i][Y]) < 0.7):
                cnt += 1
        return cnt
##############################################################################
    # function for heuristic moving
    def set_wheel_velocity(self, robot_id, left_wheel, right_wheel):
        multiplier = 1
        
        if(abs(left_wheel) > self.max_linear_velocity or abs(right_wheel) > self.max_linear_velocity):
            if (abs(left_wheel) > abs(right_wheel)):
                multiplier = self.max_linear_velocity / abs(left_wheel)
            else:
                multiplier = self.max_linear_velocity / abs(right_wheel)
        
        self.wheels[2*robot_id] = left_wheel*multiplier
        self.wheels[2*robot_id + 1] = right_wheel*multiplier

    def position(self, robot_id, x, y):
        damping = 0.35
        mult_lin = 3.5
        mult_ang = 0.4
        ka = 0
        sign = 1
        
        dx = x - self.cur_my[robot_id][X]
        dy = y - self.cur_my[robot_id][Y]
        d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2))
        desired_th = (math.pi/2) if (dx == 0 and dy == 0) else math.atan2(dy, dx)

        d_th = desired_th - self.cur_my[robot_id][TH] 
        while(d_th > math.pi):
            d_th -= 2*math.pi
        while(d_th < -math.pi):
            d_th += 2*math.pi
            
        if (d_e > 1):
            ka = 17/90
        elif (d_e > 0.5):
            ka = 19/90
        elif (d_e > 0.3):
            ka = 21/90
        elif (d_e > 0.2):
            ka = 23/90
        else:
            ka = 25/90
            
        if (d_th > helper.degree2radian(95)):
            d_th -= math.pi
            sign = -1
        elif (d_th < helper.degree2radian(-95)):
            d_th += math.pi
            sign = -1
            
        if (abs(d_th) > helper.degree2radian(85)):
            self.set_wheel_velocity(robot_id, -mult_ang*d_th, mult_ang*d_th)
        else:
            if (d_e < 5 and abs(d_th) < helper.degree2radian(40)):
                ka = 0.1
            ka *= 4
            self.set_wheel_velocity(robot_id, 
                                    sign * (mult_lin * (1 / (1 + math.exp(-3*d_e)) - damping) - mult_ang * ka * d_th),
                                    sign * (mult_lin * (1 / (1 + math.exp(-3*d_e)) - damping) + mult_ang * ka * d_th))
##############################################################################
    @inlineCallbacks
    def on_event(self, f):

        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        def avoid_goal_foul(self):
            midfielder(self, self.idxs[0])
            midfielder(self, self.idxs[1])
            self.position(self.idxs[2], 0, 0)
            self.position(self.idxs[3], 0, 0)
            self.position(self.idxs[4], 0, 0)

        def avoid_penalty_foul(self):
            midfielder(self, self.idxs[0])
            midfielder(self, self.idxs[1])
            midfielder(self, self.idxs[2])
            self.position(self.idxs[3], 0, 0)
            self.position(self.idxs[4], 0, 0)            

        def avoid_deadlock(self):
            self.position(0, self.cur_ball[X], 0)
            self.position(1, self.cur_ball[X], 0)
            self.position(2, self.cur_ball[X], 0)
            self.position(3, self.cur_ball[X], 0)
            self.position(4, self.cur_ball[X], 0)

        def midfielder(self, robot_id):
            goal_dist = helper.distance(self.cur_my[robot_id][X], self.field[X]/2,
                                         self.cur_my[robot_id][Y], 0)
            shoot_mul = 1
            dribble_dist = 0.426
            v = 5
            goal_to_ball_unit = helper.unit([self.field[X]/2 - self.cur_ball[X],
                                                            - self.cur_ball[Y]])
            delta = [self.cur_ball[X] - self.cur_my[robot_id][X],
                    self.cur_ball[Y] - self.cur_my[robot_id][Y]]

            if (self.dist_ball[MY_TEAM][robot_id] < 0.5) and (delta[X] > 0):
                self.position(robot_id, self.cur_ball[X] + v*delta[X], 
                                        self.cur_ball[Y] + v*delta[Y])                   
            else:
                self.position(robot_id, 
                    self.cur_ball[X] - dribble_dist*goal_to_ball_unit[X], 
                    self.cur_ball[Y] - dribble_dist*goal_to_ball_unit[Y])

        def offense(self):
            midfielder(self, 0)
            midfielder(self, 1)
            midfielder(self, 2)
            midfielder(self, 3)
            midfielder(self, 4)

        def set_action(self, act_idx):
            if act_idx == 0:
                # count how many robots is in the goal area
                goal_area_cnt = self.count_goal_area()
                # count how many robots is in the penalty area
                penalty_area_cnt = self.count_penalty_area()

                if goal_area_cnt > 2:
                    avoid_goal_foul(self)
                elif penalty_area_cnt > 3:
                    avoid_penalty_foul(self)
                else:
                    offense(self)
            elif act_idx == 1:
                avoid_deadlock(self)
                self.printConsole('avoid deadlock')
            else:
                self.printConsole('action index is over 1')
            
        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']            
        if 'EOF' in f:
            self.end_of_frame = f['EOF']
        
        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)
##############################################################################
        if (self.end_of_frame):
            
            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])            
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])
##############################################################################
            # get next state, reward, and reset info
            reward, next_state, reset = self.step(received_frame)
            self.printConsole('reward: '+str(reward))
            self.printConsole('next state: '+str(next_state))
            self.printConsole('reset '+str(reset))
            next_state = np.reshape(next_state, (1, self.state_dim, 1))

            # # update
            self.coach_agent.replay_memory(self.coach_agent.history, 
                self.coach_agent.action, reward, next_state, reset)
            if len(self.coach_agent.memory) >= self.coach_agent.train_start:
                self.coach_agent.train_replay()
                self.printConsole('train')

            # # save next state
            self.coach_agent.history = next_state
            self.coach_agent.action = self.coach_agent.\
                        get_action(np.reshape(self.coach_agent.history, (1, -1)))

            # coach's action to positions to go
            set_action(self, self.coach_agent.action)
            set_wheel(self, self.wheels.tolist())
            self.prev_ball = self.cur_ball

            self.train_step += 1 # increase global step
            self.rwd_sum += reward
            if received_frame.reset_reason == SCORE_MYTEAM:
                self.scr_sum += 1
            elif received_frame.reset_reason == SCORE_OPPONENT:
                self.scr_sum -= 1
            self.printConsole('step: ' + str(self.train_step))
##############################################################################
            if self.train_step % self.save_steps == 0:
                self.coach_agent.save_model("./save_model/coach_dlck_dqn%s.h5" \
                                                         % self.train_step)
                self.printConsole('epsilon: '+str(self.coach_agent.epsilon))

            if self.train_step % self.tb_steps == 0:
                data = [self.rwd_sum, self.scr_sum]
                for i in range(len(data)):
                    self.coach_agent.sess.run(self.coach_agent.update_ops[i], 
                        feed_dict={self.coach_agent.summary_placeholders[i]: 
                        float(data[i])})
                summary_str = self.coach_agent.sess.run(self.coach_agent.summary_op)
                self.coach_agent.summary_writer.add_summary(
                            summary_str, self.train_step)
                self.rwd_sum, self.scr_sum = 0., 0.
##############################################################################
            if(received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave  
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False
##############################################################################
    def onDisconnect(self):
        if reactor.running:
            reactor.stop()

Exemple #5

0

Afficher le fichier

class Component(ApplicationSession):
    """
    AI Base + Skeleton
    """
    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

        ##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_posture = []
            self.cur_ball = []
            self.prev_ball = []
            self.idx = 0
            self.wheels = [0 for _ in range(10)]

            self.state_size = 5  # the number of possible states
            self.action_size = 11  # the number of possible actions
            self.agent = DQNAgent(self.state_size, self.action_size)
            self.global_step = 0  # iteration step
            self.save_every_steps = 100  # save the model

            self.step = 0  # statistic step
            self.stats_steps = 100  # for tensorboard
            self.reward_sum = 0
            return
##############################################################################

        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))

        init_variables(self, info)

        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_posture = received_frame.coordinates[MY_TEAM]

    @inlineCallbacks
    def on_event(self, f):
        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        def set_action(robot_id, action_number):
            if action_number == 0:
                self.wheels[2 * robot_id] = 0.75
                self.wheels[2 * robot_id + 1] = 0.75
                # Go Forward with fixed velocity
            elif action_number == 1:
                self.wheels[2 * robot_id] = 0.75
                self.wheels[2 * robot_id + 1] = 0.5
                # Turn
            elif action_number == 2:
                self.wheels[2 * robot_id] = 0.75
                self.wheels[2 * robot_id + 1] = 0.25
                # Turn
            elif action_number == 3:
                self.wheels[2 * robot_id] = 0.75
                self.wheels[2 * robot_id + 1] = 0
                # Turn
            elif action_number == 4:
                self.wheels[2 * robot_id] = 0.5
                self.wheels[2 * robot_id + 1] = 75
                # Turn
            elif action_number == 5:
                self.wheels[2 * robot_id] = 0.25
                self.wheels[2 * robot_id + 1] = 0.75
                # Turn
            elif action_number == 6:
                self.wheels[2 * robot_id] = 0
                self.wheels[2 * robot_id + 1] = 0.75
                # Turn
            elif action_number == 7:
                self.wheels[2 * robot_id] = -0.75
                self.wheels[2 * robot_id + 1] = -0.75
                # Go Backward with fixed velocity
            elif action_number == 8:
                self.wheels[2 * robot_id] = -0.1
                self.wheels[2 * robot_id + 1] = 0.1
                # Spin
            elif action_number == 9:
                self.wheels[2 * robot_id] = 0.1
                self.wheels[2 * robot_id + 1] = -0.1
                # Spin
            elif action_number == 10:
                self.wheels[2 * robot_id] = 0
                self.wheels[2 * robot_id + 1] = 0
                # Do not move

        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']
        if 'EOF' in f:
            self.end_of_frame = f['EOF']

        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################
##############################################################################

        if (self.end_of_frame):

            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])

            ##############################################################################
            # next state
            self.get_coord(received_frame)
            next_state = [
                round(received_frame.coordinates[MY_TEAM][0][X] / 2.05, 2),
                round(received_frame.coordinates[MY_TEAM][0][Y] / 1.35, 2),
                round(
                    received_frame.coordinates[MY_TEAM][0][TH] / (2 * math.pi),
                    2),
                round(received_frame.coordinates[BALL][X] / 2.05, 2),
                round(received_frame.coordinates[BALL][Y] / 1.35, 2)
            ]
            next_state = np.reshape([next_state], (1, self.state_size, 1))
            next_history = np.append(next_state,
                                     self.agent.history[:, :, :3],
                                     axis=2)
            # Reward
            reward = 3 * math.exp(-10 * (helper.distance(
                self.cur_posture[0][X], self.cur_ball[X],
                self.cur_posture[0][Y], self.cur_ball[Y]) / 20))
            if self.cur_posture[0][TOUCH]:
                reward += 100
                self.printConsole("ball touch, reward is " + str(reward))
            # Reset
            reset = (received_frame.reset_reason == SCORE_MYTEAM) or (
                received_frame.reset_reason
                == SCORE_OPPONENT) or (received_frame.reset_reason == DEADLOCK)
            ##############################################################################
            self.agent.avg_q_max += np.amax(
                self.agent.model.predict(
                    np.reshape(self.agent.history, (1, -1)))[0])
            #self.printConsole("history: " + str(next_history))
            ##############################################################################
            # save the sample <s, a, r, s'> to the replay memory
            self.agent.replay_memory(self.agent.history, self.agent.action,
                                     reward, next_history, reset)
            # every time step do the training
            if len(self.agent.memory) >= self.agent.train_start:
                self.agent.train_replay()

            # save the history and get action
            self.agent.history = next_history
            self.agent.action = self.agent.get_action(
                np.reshape(self.agent.history, (1, -1)))
            #self.printConsole("agent.action: " + str(action))
            # Set robot wheels
            set_action(0, self.agent.action)
            set_wheel(self, self.wheels)

            # go one step
            self.global_step += 1
            self.step += 1
            self.reward_sum += reward
            ##############################################################################
            if self.global_step % 50 == 0:
                self.printConsole("step: " + str(self.global_step) +
                                  ", Epsilon: " + str(self.agent.epsilon) +
                                  ", reward: " + str(reward))

            if self.global_step % self.agent.update_target_rate == 0:
                # every reset update the target model to be same with model
                self.agent.update_target_model()
                self.printConsole("update target")

            if self.global_step % self.save_every_steps == 0:  # save the model
                self.agent.save_model("./save_model/aiwc_dqn.h5")
                self.printConsole("Saved model")

            if self.global_step % self.stats_steps == 0:  # plot the statics
                stats = [
                    self.reward_sum, self.agent.avg_q_max / float(self.step),
                    self.step, self.agent.avg_loss / float(self.step)
                ]
                for i in range(len(stats)):
                    self.agent.sess.run(self.agent.update_ops[i],
                                        feed_dict={
                                            self.agent.summary_placeholders[i]:
                                            float(stats[i])
                                        })
                summary_str = self.agent.sess.run(self.agent.summary_op)
                self.agent.summary_writer.add_summary(
                    summary_str, self.global_step / self.stats_steps)

                self.printConsole("average reward: " +
                                  str(self.reward_sum / float(self.step)) +
                                  ", average_q: " + str(self.agent.avg_q_max /
                                                        float(self.step)) +
                                  ", average loss: " +
                                  str(self.agent.avg_loss / float(self.step)))
                self.reward_sum, self.agent.avg_q_max, self.agent.avg_loss = 0, 0, 0
                self.step = 0
##############################################################################
            if (received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False


##############################################################################
##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()