class AgentRealistic: def __init__(self,agent_host,agent_port, mission_type, mission_seed, solution_report, state_space_graph): """ Constructor for the realistic agent """ self.AGENT_MOVEMENT_TYPE = 'Discrete' #This can be varied between the following - {Absolute, Discrete, Continuous} self.AGENT_NAME = 'Realistic' self.AGENT_ALLOWED_ACTIONS = ["movenorth 1", "movesouth 1", "movewest 1", "moveeast 1"] self.agent_host = agent_host self.agent_port = agent_port self.mission_seed = mission_seed self.mission_type = mission_type self.state_space = None; # Note - To be a true Realistic Agent, it can not know anything about the state_space a priori! self.solution_report = solution_report; self.solution_report.setMissionType(self.mission_type) self.solution_report.setMissionSeed(self.mission_seed) self.last_reward = 0 self.accumulative_reward = 0 self.brain = Dqn(2, len(self.AGENT_ALLOWED_ACTIONS), 0.9) self.brain.load() #----------------------------------------------------------------------------------------------------------------# def __ExecuteActionForRealisticAgentWithNoisyTransitionModel__(self, idx_requested_action, noise_level): """ Creates a well-defined transition model with a certain noise level """ n = len(self.AGENT_ALLOWED_ACTIONS) pp = noise_level/(n-1) * np.ones((n,1)) pp[idx_requested_action] = 1.0 - noise_level idx_actual = np.random.choice(n, 1, p=pp.flatten()) # sample from the distribution of actions actual_action = self.AGENT_ALLOWED_ACTIONS[int(idx_actual)] self.agent_host.sendCommand(actual_action) return actual_action #----------------------------------------------------------------------------------------------------------------# def run_agent(self): """ Run the Realistic agent and log the performance and resource use """ partialReward = 0 #-- Load and initiate mission --# print('Generate and load the ' + self.mission_type + ' mission with seed ' + str(self.mission_seed) + ' allowing ' + self.AGENT_MOVEMENT_TYPE + ' movements') mission_xml = init_mission(self.agent_host, self.agent_port, self.AGENT_NAME, self.mission_type, self.mission_seed, self.AGENT_MOVEMENT_TYPE) self.solution_report.setMissionXML(mission_xml) self.solution_report.start() time.sleep(1) state_t = self.agent_host.getWorldState() first = True # -- Get a state-space model by observing the Orcale/GridObserver--# while state_t.is_mission_running: if first: time.sleep(2) first = False # -- Basic map --# state_t = self.agent_host.getWorldState() if state_t.number_of_observations_since_last_state > 0: msg = state_t.observations[-1].text # Get the details for the last observed state oracle_and_internal = json.loads(msg) # Parse the Oracle JSON grid = oracle_and_internal.get(u'grid', 1) xpos = oracle_and_internal.get(u'XPos', 1) zpos = oracle_and_internal.get(u'ZPos', 1) ypos = oracle_and_internal.get(u'YPos', 1) yaw = oracle_and_internal.get(u'Yaw', 1) pitch = oracle_and_internal.get(u'Pitch', 1) #last_signal = [xpos, zpos, ypos, yaw, pitch] last_signal = [xpos, ypos] action = self.brain.update(self.last_reward, last_signal) print("Requested Action:", self.AGENT_ALLOWED_ACTIONS[action]) self.__ExecuteActionForRealisticAgentWithNoisyTransitionModel__(action, 0.1) time.sleep(0.02) self.solution_report.action_count = self.solution_report.action_count + 1 for reward_t in state_t.rewards: partialReward += reward_t.getValue() #self.last_reward = reward_t.getValue() self.accumulative_reward += reward_t.getValue() self.solution_report.addReward(reward_t.getValue(), datetime.datetime.now()) print("Reward_t:",reward_t.getValue()) print("Cummulative reward so far:", self.accumulative_reward) print("Last Reward:{0}".format(partialReward)) self.last_reward = partialReward partialReward = 0 return
last_reward = 0 memo = [] brain = Dqn((1, 80, 80), 3, 0.9) condition = True last_state = np.zeros((1, 80, 80)) counter = 0 steps_count = 0 mean_steps = 0 scores = [] up_pressed = False dino = cv2.cvtColor(cv2.imread('game_over.png'), cv2.COLOR_BGR2GRAY) w, h = dino.shape[:2] brain.load() while (condition): action = brain.update(last_reward, last_state) if action == 2: pyautogui.keyUp('down') pyautogui.keyDown('up') elif action == 0: pyautogui.keyUp('up') pyautogui.keyUp('down') else: pyautogui.keyUp('up') pyautogui.keyDown('down') # screen = np.array(ImageGrab.grab(bbox=(5, 155, 600, 296))) a = process_img(screen) proc_img = np.expand_dims(a, axis=0)
ITE.append(iteration) save() time.sleep(0.5) if __name__ == '__main__': try: address = ('192.168.43.166', 6666) # server_IP readdr = ('192.168.43.76', 6666) # PC_IP s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(readdr) sensorResult = [0, 0, 0, 0, 0] reward = 0 last_dis = 100 action = train.update(reward, sensorResult) Time = time.strftime('%H:%M:%S', time.localtime(time.time())) recordfile = open('PCRecord.txt', 'w') recordfile.write( '_________\t' + 'L_Sen' + '\t' + 'M_Sen' + '\t' + 'R_Sen' \ + '\t' + 'Dis' + '\t' + 'Rew' + '\t' + 'Act' + '\n') recordfile.close() recivedata = '0'.encode() iteration = 0 try: load() except: pass threads = [] learnThread = Process(target=learnCarState) learnThread.start() threads.append(learnThread)
time.sleep(0.1) gpio.output(trig_right, 1) time.sleep(0.00001) gpio.output(trig_right, 0) while gpio.input(echo_right) == 0: pass start2 = time.time() while gpio.input(echo_right) == 1: pass stop2 = time.time() dist_right = (stop2 - start2) * 17000 last_signal = [dist_straight, dist_left, dist_right] action = brain.update(last_reward, last_signal) print(action) if (action == 0): action1() if (action == 1): action3() if (action == 2): action2() if (dist_straight < 30): if (action == 0): last_reward = -1 act(last_reward, last_signal) c = max(last_signal)
#brain.load() saved_grids = [] rewards = [] for i in range(1000): state = game.reset() reward = 0 game_over = False sum_rewards = 0 grids = [] while not game_over: grids.append(np.array(state).reshape(6, 6)) state = logger(state) reward2 = reward / 2048 action = brain.update(reward2, state) new_state, new_reward, game_over = game.step(int(action)) state, reward = new_state, new_reward sum_rewards += reward rewards.append(sum_rewards) #if sum_rewards>4999: # print(np.array(grids)) print(i, sum_rewards) #if i%100 == 0: #print("saving") #brain.save() #with open("rewards", "wb") as f: # pickle.dump(rewards, f)