class Component(ApplicationSession): """ AI Base + Deep Q Network example """ def __init__(self, config): ApplicationSession.__init__(self, config) def printConsole(self, message): print(message) sys.__stdout__.flush() def onConnect(self): self.join(self.config.realm) @inlineCallbacks def onJoin(self, details): ############################################################################## def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, number_of_robots # field, goal, penalty_area, goal_area, resolution Dimension: [x, y] # ball_radius, ball_mass, # robot_size, robot_height, axle_length, robot_body_mass, ID: [0, 1, 2, 3, 4] # wheel_radius, wheel_mass, ID: [0, 1, 2, 3, 4] # max_linear_velocity, max_torque, codewords, ID: [0, 1, 2, 3, 4] # self.game_time = info['game_time'] # self.number_of_robots = info['number_of_robots'] # self.field = info['field'] # self.goal = info['goal'] # self.penalty_area = info['penalty_area'] # self.goal_area = info['goal_area'] self.resolution = info['resolution'] # self.ball_radius = info['ball_radius'] # self.ball_mass = info['ball_mass'] # self.robot_size = info['robot_size'] # self.robot_height = info['robot_height'] # self.axle_length = info['axle_length'] # self.robot_body_mass = info['robot_body_mass'] # self.wheel_radius = info['wheel_radius'] # self.wheel_mass = info['wheel_mass'] self.max_linear_velocity = info['max_linear_velocity'] # self.max_torque = info['max_torque'] # self.codewords = info['codewords'] self.total_distance=0 self.ball_touch=0 self.colorChannels = 3 # nf self.end_of_frame = False self.image = Received_Image(self.resolution, self.colorChannels) self.received_frame = Frame() self.D = [] # Replay Memory self.distance_buffer=[] #distance buffer for reward self.update = 100 # Update Target Network self.epsilon = 1.0 # Initial epsilon value self.final_epsilon = 0.05 # Final epsilon value self.dec_epsilon = 0.05 # Decrease rate of epsilon for every generation self.step_epsilon = 5000 # Number of iterations for every generation self.observation_steps = 1000 # Number of iterations to observe before training every generation self.save_every_steps = 1000 # Save checkpoint self.num_actions = 11 # Number of possible possible actions self._frame = 0 self._iterations = 0 self.minibatch_size = 64 self.gamma = 0.99 self.sqerror = 100 # Initial sqerror value self.Q = NeuralNetwork(None, False, False) # 2nd term: False to start training from scratch, use CHECKPOINT to load a checkpoint self.Q_ = NeuralNetwork(self.Q, False, True) self.wheels = [0 for _ in range(10)] return ############################################################################## try: info = yield self.call(u'aiwc.get_info', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: try: self.sub = yield self.subscribe(self.on_event, args.key) except Exception as e2: self.printConsole("Error: {}".format(e2)) init_variables(self, info) try: yield self.call(u'aiwc.ready', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: self.printConsole("I am ready for the game!") @inlineCallbacks def on_event(self, f): @inlineCallbacks def set_wheel(self, robot_wheels): yield self.call(u'aiwc.set_speed', args.key, robot_wheels) return def set_action(robot_id, action_number): if action_number == 0: self.wheels[2*robot_id] = 0 self.wheels[2*robot_id + 1] = 0 # Go Forward with fixed velocity elif action_number == 1: self.wheels[2*robot_id] = 2 self.wheels[2*robot_id + 1] = 2 # Turn elif action_number == 2: self.wheels[2*robot_id] = 2 self.wheels[2*robot_id + 1] = 1 # Turn elif action_number == 3: self.wheels[2*robot_id] = 2 self.wheels[2*robot_id + 1] = 0 # Turn elif action_number == 4: self.wheels[2*robot_id] = -2 self.wheels[2*robot_id + 1] = 0 # Turn elif action_number == 5: self.wheels[2*robot_id] = -2 self.wheels[2*robot_id + 1] = -1 # Turn elif action_number == 6: self.wheels[2*robot_id] = -2 self.wheels[2*robot_id + 1] = -2 # Turn elif action_number == 7: self.wheels[2*robot_id] = -1 self.wheels[2*robot_id + 1] = -2 # Go Backward with fixed velocity elif action_number == 8: self.wheels[2*robot_id] = 0 self.wheels[2*robot_id + 1] = -2 # Spin elif action_number == 9: self.wheels[2*robot_id] = 0 self.wheels[2*robot_id + 1] = 2 # Spin elif action_number == 10: self.wheels[2*robot_id] = 1 self.wheels[2*robot_id + 1] = 2 # Do not move def distance(x1, x2, y1, y2): return math.sqrt(math.pow(x1 - x2, 2) + math.pow(y1 - y2, 2)) # initiate empty frame if (self.end_of_frame): self.received_frame = Frame() self.end_of_frame = False received_subimages = [] if 'time' in f: self.received_frame.time = f['time'] if 'score' in f: self.received_frame.score = f['score'] if 'reset_reason' in f: self.received_frame.reset_reason = f['reset_reason'] if 'half_passed' in f: self.received_frame.half_passed = f['half_passed'] if 'subimages' in f: self.received_frame.subimages = f['subimages'] # Comment the next lines if you don't need to use the image information for s in self.received_frame.subimages: received_subimages.append(SubImage(s['x'], s['y'], s['w'], s['h'], s['base64'].encode('utf8'))) self.image.update_image(received_subimages) if 'coordinates' in f: self.received_frame.coordinates = f['coordinates'] if 'EOF' in f: self.end_of_frame = f['EOF'] #self.printConsole(self.received_frame.time) #self.printConsole(self.received_frame.score) #self.printConsole(self.received_frame.reset_reason) #self.printConsole(self.end_of_frame) f = open('/home/aiworldcup/Downloads/test_world/examples/0812/m2_train_model.txt', mode = 'a', encoding = 'utf-8') if (self.end_of_frame): self._frame += 1 # To get the image at the end of each frame use the variable: #self.printConsole(self.image.ImageBuffer) ############################################################################## #(virtual update()) # Reward distance2 = math.exp(-10*(distance(self.received_frame.coordinates[MY_TEAM][0][X], self.received_frame.coordinates[BALL][X], self.received_frame.coordinates[MY_TEAM][0][Y], self.received_frame.coordinates[BALL][Y])/4.1)) #buffer에 넣어주는코드 self.distance_buffer.append(distance2) reward=0 if(len(self.distance_buffer)>=3): dnum=(self._frame)-1 self.printConsole(dnum) pre_delta_distance=self.distance_buffer[dnum-1]-self.distance_buffer[dnum-2] delta_distance=self.distance_buffer[dnum]-self.distance_buffer[dnum-1] if pre_delta_distance >= delta_distance : reward+= 10 else : reward-=10 # m2의 reward model if 0<=distance2<=3: reward += 9 elif 3<distance2<=6: reward += 6 elif 6<distance2<=9.1: reward += 0 # Ball touch if self.received_frame.coordinates[MY_TEAM][4][TOUCH] == True: reward += 100 # State # If you want to use the image as the input for your network # You can use pillow: PIL.Image to get and resize the input frame as follows #img = Image.fromarray((self.image.ImageBuffer/255).astype('uint8'), 'RGB') # Get normalized image as a PIL.Image object #resized_img = img.resize((NEW_X,NEW_Y)) #final_img = np.array(resized_img) # round 정수로 수정한 코드 position = [round(self.received_frame.coordinates[MY_TEAM][4][X]/2.05), round(self.received_frame.coordinates[MY_TEAM][4][Y]/1.35), round(self.received_frame.coordinates[MY_TEAM][4][TH]/(2*math.pi)), round(self.received_frame.coordinates[BALL][X]/2.05), round(self.received_frame.coordinates[BALL][Y]/1.35)] # Action if np.random.rand() < self.epsilon: action = random.randint(0,10) else: action = self.Q.BestAction(np.array(position)) # using CNNs use final_img as input # Set robot wheels set_action(4, action) set_wheel(self, self.wheels) # Update Replay Memory self.D.append([np.array(position), action, reward]) ############################################################################## #David :: udpate check self.printConsole("-----------------update------------------") self.printConsole("#_frame:" + str(self._frame)) self.printConsole("Iterations:" + str(self._iterations)) self.printConsole("ball coordinates : " + str(self.received_frame.coordinates[BALL][X])) self.printConsole("distance :" + str(distance(self.received_frame.coordinates[MY_TEAM][4][X], self.received_frame.coordinates[BALL][X], self.received_frame.coordinates[MY_TEAM][4][Y], self.received_frame.coordinates[BALL][Y]))) self.printConsole("distance/4.1 :" + str(distance(self.received_frame.coordinates[MY_TEAM][4][X], self.received_frame.coordinates[BALL][X], self.received_frame.coordinates[MY_TEAM][4][Y], self.received_frame.coordinates[BALL][Y])/4.1)) self.printConsole("player[4] wheel :" + "[L]" + str(self.wheels[2*4]) + "[R]"+ str(self.wheels[2*4 + 1])) self.printConsole("reward :" + str(reward)) self.printConsole("Epsilon:" + str(self.epsilon)) self.printConsole("len(D):" + str(len(self.D))) self.printConsole("D info:" + str(np.array(position))+ " " + str( action)+ " " + str(reward)) #self.printConsole("position : " + str(position[0])) self.printConsole("-----------------etadpu------------------") self.total_distance += distance(self.received_frame.coordinates[MY_TEAM][4][X], self.received_frame.coordinates[BALL][X], self.received_frame.coordinates[MY_TEAM][4][Y], self.received_frame.coordinates[BALL][Y]) average_distance = self.total_distance/self._frame self.ball_touch += self.received_frame.coordinates[MY_TEAM][4][TOUCH] f.write('\n#_frames = ' + str(self._frame) + ' touch = ' + str(self.ball_touch) + 'distance : ' + str(distance(self.received_frame.coordinates[MY_TEAM][4][X], self.received_frame.coordinates[BALL][X], self.received_frame.coordinates[MY_TEAM][4][Y], self.received_frame.coordinates[BALL][Y])) + 'average_distance : ' + str(average_distance)) # ############################################################################## # Training! if len(self.D) >= self.observation_steps: self._iterations += 1 a = np.zeros((self.minibatch_size, self.num_actions)) r = np.zeros((self.minibatch_size, 1)) batch_phy = np.zeros((self.minibatch_size, 5)) # depends on what is your input state batch_phy_ = np.zeros((self.minibatch_size, 5)) # depends on what is your input state for i in range(self.minibatch_size): index = np.random.randint(len(self.D)-1) # Sample a random index from the replay memory a[i] = [0 if j !=self.D[index][1] else 1 for j in range(self.num_actions)] r[i] = self.D[index][2] batch_phy[i] = self.D[index][0].reshape((1,5)) # depends on what is your input state batch_phy_[i] = self.D[index+1][0].reshape((1,5)) # depends on what is your input state y_value = r + self.gamma*np.max(self.Q_.IterateNetwork(batch_phy_), axis=1).reshape((self.minibatch_size,1)) self.sqerror = self.Q.TrainNetwork(batch_phy, a, y_value) if self._iterations % 100 == 0: # Print information every 100 iterations self.printConsole("Squared Error(Episode" + str(self._iterations) + "): " + str(self.sqerror)) self.printConsole("Epsilon: " + str(self.epsilon)) if self._iterations % self.update == 0: self.Q_.Copy(self.Q) self.printConsole("Copied Target Network") if self._iterations % self.save_every_steps == 0: self.Q.SaveToFile(CHECKPOINT) self.printConsole("Saved Checkpoint") if self._iterations % self.step_epsilon == 0: self.epsilon = max(self.epsilon - self.dec_epsilon, self.final_epsilon) self.D = [] # Reset Replay Memory for new generation self.printConsole("New Episode! New Epsilon:" + str(self.epsilon)) ############################################################################## if(self.received_frame.reset_reason == GAME_END): ############################################################################## #(virtual finish() in random_walk.cpp) #save your data f.close() with open(args.datapath + '/result.txt', 'w') as output: #output.write('yourvariables') output.close() #unsubscribe; reset or leave yield self.sub.unsubscribe() try: yield self.leave() except Exception as e: self.printConsole("Error: {}".format(e)) ############################################################################## self.end_of_frame = False def onDisconnect(self): if reactor.running: reactor.stop()
class Component(ApplicationSession): """ AI Base + Deep Q Network example """ def __init__(self, config): ApplicationSession.__init__(self, config) def printConsole(self, message): print(message) sys.__stdout__.flush() def onConnect(self): self.join(self.config.realm) @inlineCallbacks def onJoin(self, details): ############################################################################## def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] # self.field = info['field'] self.max_linear_velocity = info['max_linear_velocity'] self.resolution = info['resolution'] self.colorChannels = 3 # nf self.end_of_frame = False self.image = Received_Image(self.resolution, self.colorChannels) self.D = [] # Replay Memory self.update = 100 # Update Target Network self.epsilon = 1.0 # Initial epsilon value self.final_epsilon = 0.05 # Final epsilon value self.dec_epsilon = 0.05 # Decrease rate of epsilon for every generation self.step_epsilon = 20000 # Number of iterations for every generation self.observation_steps = 5000 # Number of iterations to observe before training every generation self.save_every_steps = 5000 # Save checkpoint self.num_actions = 11 # Number of possible possible actions self._frame = 0 self._iterations = 0 self.minibatch_size = 64 self.gamma = 0.99 self.sqerror = 100 # Initial sqerror value self.Q = NeuralNetwork( None, False, False ) # 2nd term: False to start training from scratch, use CHECKPOINT to load a checkpoint self.Q_ = NeuralNetwork(self.Q, False, True) self.wheels = [0 for _ in range(10)] return ############################################################################## try: info = yield self.call(u'aiwc.get_info', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: try: self.sub = yield self.subscribe(self.on_event, args.key) except Exception as e2: self.printConsole("Error: {}".format(e2)) init_variables(self, info) try: yield self.call(u'aiwc.ready', args.key) except Exception as e: self.printConsole("Error: {}".format(e)) else: self.printConsole("I am ready for the game!") @inlineCallbacks def on_event(self, f): @inlineCallbacks def set_wheel(self, robot_wheels): yield self.call(u'aiwc.set_speed', args.key, robot_wheels) return def set_action(robot_id, action_number): if action_number == 0: self.wheels[2 * robot_id] = 0.75 self.wheels[2 * robot_id + 1] = 0.75 # Go Forward with fixed velocity elif action_number == 1: self.wheels[2 * robot_id] = 0.75 self.wheels[2 * robot_id + 1] = 0.5 # Turn elif action_number == 2: self.wheels[2 * robot_id] = 0.75 self.wheels[2 * robot_id + 1] = 0.25 # Turn elif action_number == 3: self.wheels[2 * robot_id] = 0.75 self.wheels[2 * robot_id + 1] = 0 # Turn elif action_number == 4: self.wheels[2 * robot_id] = 0.5 self.wheels[2 * robot_id + 1] = 75 # Turn elif action_number == 5: self.wheels[2 * robot_id] = 0.25 self.wheels[2 * robot_id + 1] = 0.75 # Turn elif action_number == 6: self.wheels[2 * robot_id] = 0 self.wheels[2 * robot_id + 1] = 0.75 # Turn elif action_number == 7: self.wheels[2 * robot_id] = -0.75 self.wheels[2 * robot_id + 1] = -0.75 # Go Backward with fixed velocity elif action_number == 8: self.wheels[2 * robot_id] = -0.1 self.wheels[2 * robot_id + 1] = 0.1 # Spin elif action_number == 9: self.wheels[2 * robot_id] = 0.1 self.wheels[2 * robot_id + 1] = -0.1 # Spin elif action_number == 10: self.wheels[2 * robot_id] = 0 self.wheels[2 * robot_id + 1] = 0 # Do not move def distance(x1, x2, y1, y2): return math.sqrt(math.pow(x1 - x2, 2) + math.pow(y1 - y2, 2)) # initiate empty frame received_frame = Frame() received_subimages = [] if 'time' in f: received_frame.time = f['time'] if 'score' in f: received_frame.score = f['score'] if 'reset_reason' in f: received_frame.reset_reason = f['reset_reason'] if 'subimages' in f: received_frame.subimages = f['subimages'] # Comment the next lines if you don't need to use the image information for s in received_frame.subimages: received_subimages.append( SubImage(s['x'], s['y'], s['w'], s['h'], s['base64'].encode('utf8'))) self.image.update_image(received_subimages) if 'coordinates' in f: received_frame.coordinates = f['coordinates'] if 'EOF' in f: self.end_of_frame = f['EOF'] #self.printConsole(received_frame.time) #self.printConsole(received_frame.score) #self.printConsole(received_frame.reset_reason) #self.printConsole(self.end_of_frame) if (self.end_of_frame): # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE]) #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE]) #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH]) #self.printConsole(received_frame.coordinates[BALL][X]) #self.printConsole(received_frame.coordinates[BALL][Y]) self._frame += 1 # To get the image at the end of each frame use the variable: #self.printConsole(self.image.ImageBuffer) ############################################################################## #(virtual update()) # Reward reward = math.exp( -10 * (distance(received_frame.coordinates[MY_TEAM][0][X], received_frame.coordinates[BALL][X], received_frame.coordinates[MY_TEAM][0][Y], received_frame.coordinates[BALL][Y]) / 4.1)) # State # If you want to use the image as the input for your network # You can use pillow: PIL.Image to get and resize the input frame as follows #img = Image.fromarray((self.image.ImageBuffer/255).astype('uint8'), 'RGB') # Get normalized image as a PIL.Image object #resized_img = img.resize((NEW_X,NEW_Y)) #final_img = np.array(resized_img) # Example: using the normalized coordinates for robot 0 and ball position = [ round(received_frame.coordinates[MY_TEAM][0][X] / 2.05, 2), round(received_frame.coordinates[MY_TEAM][0][Y] / 1.35, 2), round( received_frame.coordinates[MY_TEAM][0][TH] / (2 * math.pi), 2), round(received_frame.coordinates[BALL][X] / 2.05, 2), round(received_frame.coordinates[BALL][Y] / 1.35, 2) ] # Action if np.random.rand() < self.epsilon: action = random.randint(0, 10) else: action = self.Q.BestAction( np.array(position)) # using CNNs use final_img as input # Set robot wheels set_action(0, action) set_wheel(self, self.wheels) # Update Replay Memory self.D.append([np.array(position), action, reward]) ############################################################################## ############################################################################## # Training! if len(self.D) >= self.observation_steps: self._iterations += 1 a = np.zeros((self.minibatch_size, self.num_actions)) r = np.zeros((self.minibatch_size, 1)) batch_phy = np.zeros( (self.minibatch_size, 5)) # depends on what is your input state batch_phy_ = np.zeros( (self.minibatch_size, 5)) # depends on what is your input state for i in range(self.minibatch_size): index = np.random.randint( len(self.D) - 1) # Sample a random index from the replay memory a[i] = [ 0 if j != self.D[index][1] else 1 for j in range(self.num_actions) ] r[i] = self.D[index][2] batch_phy[i] = self.D[index][0].reshape( (1, 5)) # depends on what is your input state batch_phy_[i] = self.D[index + 1][0].reshape( (1, 5)) # depends on what is your input state y_value = r + self.gamma * np.max( self.Q_.IterateNetwork(batch_phy_), axis=1).reshape( (self.minibatch_size, 1)) self.sqerror = self.Q.TrainNetwork(batch_phy, a, y_value) if self._iterations % 100 == 0: # Print information every 100 iterations self.printConsole("Squared Error(Episode" + str(self._iterations) + "): " + str(self.sqerror)) self.printConsole("Epsilon: " + str(self.epsilon)) if self._iterations % self.update == 0: self.Q_.Copy(self.Q) self.printConsole("Copied Target Network") if self._iterations % self.save_every_steps == 0: self.Q.SaveToFile(CHECKPOINT) self.printConsole("Saved Checkpoint") if self._iterations % self.step_epsilon == 0: self.epsilon = max(self.epsilon - self.dec_epsilon, self.final_epsilon) self.D = [] # Reset Replay Memory for new generation self.printConsole("New Episode! New Epsilon:" + str(self.epsilon)) ############################################################################## if (received_frame.reset_reason == GAME_END): ############################################################################## #(virtual finish() in random_walk.cpp) #save your data with open(args.datapath + '/result.txt', 'w') as output: #output.write('yourvariables') output.close() #unsubscribe; reset or leave yield self.sub.unsubscribe() try: yield self.leave() except Exception as e: self.printConsole("Error: {}".format(e)) ############################################################################## self.end_of_frame = False def onDisconnect(self): if reactor.running: reactor.stop()