Exemple #1
0
class Component(ApplicationSession):

    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.tunning_velocity = self.max_linear_velocity
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]

            # self.set_seed(0)
            random.seed(0)
            np.random.seed(0)
            tf.set_random_seed(0)

            self.state_dim = 3 # #3*my robots + 2*op robots + 2
            self.action_dim = 2 # 2
            self.agent = DDPGAgent(self.state_dim, self.action_dim, self.tunning_velocity, -self.tunning_velocity)
            # self.printConsole("max velocity: " + str(self.max_linear_velocity))       
            self.global_step = 0 # iteration step            
            self.save_every_steps = 12000 # save the model every 10 minutes
 
            self.stats_steps = 6000 # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0 
            self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]]   
            self.inner_step = 0
            self.wheels = np.zeros(self.number_of_robots*2)
            self.action = np.zeros(self.action_dim)
            return
##############################################################################
            
        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))
               
        init_variables(self, info)
        
        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture =received_frame.coordinates[OP_TEAM]

    def get_reward(self, reset_reason):
        # pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 3)
        # cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 3)
        reward = helper.dipole_potential(self.cur_my_posture[4][X], self.cur_my_posture[4][Y], 2.1, 0.3)

        # Add dead lock penalty
        # if(reset_reason == SCORE_MYTEAM):
        #     self.score_sum += 1
        #     reward += 24 # minimum 24
        #     self.printConsole("my team goal")

        # if(reset_reason == SCORE_OPPONENT):
        #     self.score_sum -= 1
        #     reward -= 24 # maxmimum -24
        #     self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward

    def set_wheel_velocity(self, robot_id, left_wheel, right_wheel):
        multiplier = 1
        
        if(abs(left_wheel) > self.max_linear_velocity or abs(right_wheel) > self.max_linear_velocity):
            if (abs(left_wheel) > abs(right_wheel)):
                multiplier = self.max_linear_velocity / abs(left_wheel)
            else:
                multiplier = self.max_linear_velocity / abs(right_wheel)
        
        self.wheels[2*robot_id] = left_wheel*multiplier
        self.wheels[2*robot_id + 1] = right_wheel*multiplier

    def position(self, robot_id, x, y):
        damping = 0.35
        mult_lin = 3.5
        mult_ang = 0.4
        ka = 0
        sign = 1
        
        dx = x - self.cur_my_posture[robot_id][X]
        dy = y - self.cur_my_posture[robot_id][Y]
        d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2))
        desired_th = (math.pi/2) if (dx == 0 and dy == 0) else math.atan2(dy, dx)

        d_th = desired_th - self.cur_my_posture[robot_id][TH] 
        while(d_th > math.pi):
            d_th -= 2*math.pi
        while(d_th < -math.pi):
            d_th += 2*math.pi
            
        if (d_e > 1):
            ka = 17/90
        elif (d_e > 0.5):
            ka = 19/90
        elif (d_e > 0.3):
            ka = 21/90
        elif (d_e > 0.2):
            ka = 23/90
        else:
            ka = 25/90
            
        if (d_th > helper.degree2radian(95)):
            d_th -= math.pi
            sign = -1
        elif (d_th < helper.degree2radian(-95)):
            d_th += math.pi
            sign = -1
            
        if (abs(d_th) > helper.degree2radian(85)):
            self.set_wheel_velocity(robot_id, -mult_ang*d_th, mult_ang*d_th)
        else:
            if (d_e < 5 and abs(d_th) < helper.degree2radian(40)):
                ka = 0.1
            ka *= 4
            self.set_wheel_velocity(robot_id, 
                                    sign * (mult_lin * (1 / (1 + math.exp(-3*d_e)) - damping) - mult_ang * ka * d_th),
                                    sign * (mult_lin * (1 / (1 + math.exp(-3*d_e)) - damping) + mult_ang * ka * d_th))        
            
    @inlineCallbacks
    def on_event(self, f):        

        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return
            
        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']            
        if 'EOF' in f:
            self.end_of_frame = f['EOF']
        
        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################        

        if (self.end_of_frame):
            
            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])            
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])
                        
            self.global_step += 1                        
            self.get_coord(received_frame)
##############################################################################
            # Next state
            next_state = [round(self.cur_my_posture[4][X]/2.05, 2), round(self.cur_my_posture[4][Y]/1.35, 2), round(self.cur_my_posture[4][TH]/(2*math.pi), 2)]
            next_state = np.reshape([next_state], (1, self.state_dim, 1))
            next_history = np.append(next_state, self.agent.history[:, :, :3], axis=2)

            # Reward
            reward = self.get_reward(received_frame.reset_reason)

            # Reset
            if(received_frame.reset_reason != NONE):
                reset = True
                self.printConsole("reset reason: " + str(received_frame.reset_reason))
            else:
                reset = False
            
            self.agent.train_agent(np.reshape(self.agent.history, -1), self.action, reward, np.reshape(next_history, -1), reset, self.global_step)

            # save the history and get action
            self.agent.history = next_history

            # fine tuning with RL
            self.position(4, 2.05, 0)
            self.printConsole("                 rule-based action: " + str(self.wheels[8:10]))
            self.action = self.agent.get_action(np.reshape(self.agent.history, -1))
            self.printConsole("                 tuning action: " + str(self.action[:2]))
            self.wheels[8:10] += self.action
            self.printConsole("                 fine-tuned action: " + str(self.wheels[8:10]))

            set_wheel(self, self.wheels.tolist())

            self.reward_sum += reward            
##############################################################################   
            # for tensorboard         
            # self.agent.avg_q_max += self.agent.critic.predict([np.reshape(self.agent.history, (1, -1)), 
            #     np.reshape(self.agent.action, (1, -1))])[0]
##############################################################################
            # if self.global_step % 50 == 0:
            #     # print current status
            #     self.printConsole("step: " + str(self.global_step) + ", Epsilon: " + str(self.agent.epsilon))

            if self.global_step % self.save_every_steps == 0: # save the model
                self.agent.saver.save(self.agent.sess, self.agent.save_file)
                self.printConsole("Saved model")

            if reset: # plot the statics
                self.printConsole("add data to tensorboard")
                stats = [self.reward_sum, self.agent.avg_q_max / float(self.stats_steps),
                             self.agent.loss_sum / float(self.stats_steps), self.score_sum]
                for i in range(len(stats)):
                    self.agent.sess.run(self.agent.update_ops[i], feed_dict={
                        self.agent.summary_placeholders[i]: float(stats[i])
                    })
                summary_str = self.agent.sess.run(self.agent.summary_op)
                self.agent.summary_writer.add_summary(summary_str, self.inner_step)

                self.reward_sum, self.agent.avg_q_max, self.agent.loss_sum = 0, 0, 0
                self.score_sum = 0
                self.inner_step += 1
##############################################################################
            if(received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave  
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False
##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()
Exemple #2
0
class Component(ApplicationSession):
    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

        ##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]
            self.wheels = [0 for _ in range(10)]

            # self.set_seed(0)
            random.seed(0)
            np.random.seed(0)
            tf.set_random_seed(0)

            self.state_dim = 3  # #3*my robots + 2*op robots + 2
            self.action_dim = 2  # 2
            self.agent = DDPGAgent(self.state_dim, self.action_dim,
                                   self.max_linear_velocity,
                                   -self.max_linear_velocity)
            # self.printConsole("max velocity: " + str(self.max_linear_velocity))
            self.global_step = 0  # iteration step
            self.save_every_steps = 12000  # save the model every 10 minutes

            self.stats_steps = 6000  # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0
            self.active_flag = [[False for _ in range(5)],
                                [False for _ in range(5)]]
            self.inner_step = 0
            return
##############################################################################

        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))

        init_variables(self, info)

        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture = received_frame.coordinates[OP_TEAM]

    def get_reward(self, reset_reason):
        # pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 3)
        # cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 3)
        reward = helper.dipole_potential(self.cur_my_posture[4][X],
                                         self.cur_my_posture[4][Y], 2.1, 0.3)

        # Add dead lock penalty
        # if(reset_reason == SCORE_MYTEAM):
        #     self.score_sum += 1
        #     reward += 24 # minimum 24
        #     self.printConsole("my team goal")

        # if(reset_reason == SCORE_OPPONENT):
        #     self.score_sum -= 1
        #     reward -= 24 # maxmimum -24
        #     self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward

    @inlineCallbacks
    def on_event(self, f):
        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']
        if 'EOF' in f:
            self.end_of_frame = f['EOF']

        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################

        if (self.end_of_frame):

            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])

            self.global_step += 1
            self.get_coord(received_frame)
            ##############################################################################
            # Next state
            next_state = [
                round(self.cur_my_posture[4][X] / 2.05, 2),
                round(self.cur_my_posture[4][Y] / 1.35, 2),
                round(self.cur_my_posture[4][TH] / (2 * math.pi), 2)
            ]
            next_state = np.reshape([next_state], (1, self.state_dim, 1))
            next_history = np.append(next_state,
                                     self.agent.history[:, :, :3],
                                     axis=2)

            # Reward
            reward = self.get_reward(received_frame.reset_reason)

            # Reset
            if (received_frame.reset_reason != NONE):
                reset = True
                self.printConsole("reset reason: " +
                                  str(received_frame.reset_reason))
            else:
                reset = False

            self.agent.train_agent(np.reshape(self.agent.history,
                                              -1), self.wheels[8:10], reward,
                                   np.reshape(next_history, -1), reset,
                                   self.global_step)

            # save the history and get action
            self.agent.history = next_history
            action = self.agent.get_action(np.reshape(self.agent.history, -1),
                                           self.global_step)

            # Set wheels
            self.wheels[8] = action[0]
            self.wheels[9] = action[1]
            # self.printConsole("left wheel: " + str(self.wheels[8]) + "right wheel: " +str(self.wheels[9]))

            self.printConsole("wheels: " + str(self.wheels[8:10]))
            set_wheel(self, self.wheels)

            self.reward_sum += reward
            ##############################################################################
            # for tensorboard
            # self.agent.avg_q_max += self.agent.critic.predict([np.reshape(self.agent.history, (1, -1)),
            #     np.reshape(self.agent.action, (1, -1))])[0]
            ##############################################################################
            # if self.global_step % 50 == 0:
            #     # print current status
            #     self.printConsole("step: " + str(self.global_step) + ", Epsilon: " + str(self.agent.epsilon))

            if self.global_step % self.save_every_steps == 0:  # save the model
                self.agent.saver.save(self.agent.sess, self.agent.save_file)
                self.printConsole("Saved model")

            if reset:  # plot the statics
                self.printConsole("add data to tensorboard")
                stats = [
                    self.reward_sum,
                    self.agent.avg_q_max / float(self.stats_steps),
                    self.agent.loss_sum / float(self.stats_steps),
                    self.score_sum
                ]
                for i in range(len(stats)):
                    self.agent.sess.run(self.agent.update_ops[i],
                                        feed_dict={
                                            self.agent.summary_placeholders[i]:
                                            float(stats[i])
                                        })
                summary_str = self.agent.sess.run(self.agent.summary_op)
                self.agent.summary_writer.add_summary(summary_str,
                                                      self.inner_step)

                self.reward_sum, self.agent.avg_q_max, self.agent.loss_sum = 0, 0, 0
                self.score_sum = 0
                self.inner_step += 1
##############################################################################
            if (received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False


##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()