def build(self): self.paused = True self.last_size = [0, 0] self.goals = [] self.parent = Game() parent = self.parent sec_num = 6 box_size = 400 self.brain = Dqn(sec_num * 2 + 2, 3, 0.9) circle = Circle() dest = Destinity() parent.serve_car(circle=circle, dest=dest, sec_num=sec_num, box_size=box_size, brain=self.brain, goals=self.goals) Clock.schedule_interval(self.pauseCheck, 1.0 / 60.0) #Clock.schedule_interval(parent.update, 0) self.painter = MyPaintWidget(self.goals) clearbtn = Button(text='clear') savebtn = Button(text='save', pos=(parent.width, 0)) loadbtn = Button(text='load', pos=(2 * parent.width, 0)) self.pausebtn = Button(text='start', pos=(3 * parent.width, 0)) plotbtn = Button(text='plot', pos=(4 * parent.width, 0)) self.setGoalsbtn = Button(text='drawing', pos=(5 * parent.width, 0)) clearbtn.bind(on_release=self.clear_canvas) savebtn.bind(on_release=self.save) loadbtn.bind(on_release=self.load) self.pausebtn.bind(on_release=self.pauseSwitch) plotbtn.bind(on_release=self.plot) self.setGoalsbtn.bind(on_release=self.setGoals) parent.add_widget(self.painter) parent.add_widget(clearbtn) parent.add_widget(savebtn) parent.add_widget(loadbtn) parent.add_widget(self.pausebtn) parent.add_widget(plotbtn) parent.add_widget(self.setGoalsbtn) parent.add_widget(dest) parent.add_widget(circle) return parent
def __init__(self,agent_host,agent_port, mission_type, mission_seed, solution_report, state_space_graph): """ Constructor for the realistic agent """ self.AGENT_MOVEMENT_TYPE = 'Discrete' #This can be varied between the following - {Absolute, Discrete, Continuous} self.AGENT_NAME = 'Realistic' self.AGENT_ALLOWED_ACTIONS = ["movenorth 1", "movesouth 1", "movewest 1", "moveeast 1"] self.agent_host = agent_host self.agent_port = agent_port self.mission_seed = mission_seed self.mission_type = mission_type self.state_space = None; # Note - To be a true Realistic Agent, it can not know anything about the state_space a priori! self.solution_report = solution_report; self.solution_report.setMissionType(self.mission_type) self.solution_report.setMissionSeed(self.mission_seed) self.last_reward = 0 self.accumulative_reward = 0 self.brain = Dqn(2, len(self.AGENT_ALLOWED_ACTIONS), 0.9) self.brain.load()
# Importing the Dqn object from our AI in ai.py from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 init_time = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(8, 3, 0.9, temp_value=75, reward_windows_capacity=10000, memory_size=1000000) action2rotation = [0, 20, -20] last_reward = 0 scores = [] # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global first_update
from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') Config.set('graphics', 'resizable', False) Config.set('graphics', 'width', '1273') Config.set('graphics', 'height', '1049') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(10,3,0.9) action2rotation = [10,0,-10] last_reward = 0 scores = [] im = CoreImage("./images/lalbagh_mask.png") # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global first_update sand = np.zeros((longueur,largeur)) img = PILImage.open("./images/lalbagh_mask_rot_90.png").convert('L') sand = np.asarray(img)/255
# Importing Dqn from ai file from ai import Dqn # Adding click configuration Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing variables to keep track of sand last_x, last_y, n_points, length = [0] * 4 # Setting up our AI and other variables nb_dimensions = 5 nb_actions = 3 gamma = 0.9 mem_capacity = 100_000 temp = 75 brain = Dqn(dimensions=nb_dimensions, action=nb_actions, gamma=gamma, mem_capacity=mem_capacity, temperature=temp) action_to_rotation = [0, 20, -20] last_reward = 0 scores = [] first_update = True # Initializing the map def init(): """Initialize the map for Agent """ global sand, goal_x, goal_y, first_update sand = np.zeros((map_width, map_height)) goal_x = 20 goal_y = map_height - 20 first_update = False
# Importing the Dqn object from our AI in ai.py from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') Config.set('graphics', 'width', '1280') Config.set('graphics', 'height', '720') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(5,3,0.9) # 5 element list as input(distances), 3 possible actions and gamma rate of DQN action2rotation = [0,20,-20] # go Straight, Right, Left last_reward = 0 scores = [] time_intervals = [] start_time = time.clock() # Initializing the map first_update = True def init(): global sand #pixels on the map are either sand or not sand, 1 or 0 global goal_x global goal_y global first_update sand = np.zeros((longueur,largeur)) goal_x = 20 # upper left near-corner
# Implement a timer from timeit import default_timer starting_time = default_timer() duration_time = default_timer() - starting_time # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(5, 3, 0.9) action2rotation = [0, 20, -20] last_reward = 0 scores = [] # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global first_update sand = np.zeros((longueur, largeur)) goal_x = 20
class CarApp(App): def build(self): self.paused = True self.last_size = [0, 0] self.goals = [] self.parent = Game() parent = self.parent sec_num = 6 box_size = 400 self.brain = Dqn(sec_num * 2 + 2, 3, 0.9) circle = Circle() dest = Destinity() parent.serve_car(circle=circle, dest=dest, sec_num=sec_num, box_size=box_size, brain=self.brain, goals=self.goals) Clock.schedule_interval(self.pauseCheck, 1.0 / 60.0) #Clock.schedule_interval(parent.update, 0) self.painter = MyPaintWidget(self.goals) clearbtn = Button(text='clear') savebtn = Button(text='save', pos=(parent.width, 0)) loadbtn = Button(text='load', pos=(2 * parent.width, 0)) self.pausebtn = Button(text='start', pos=(3 * parent.width, 0)) plotbtn = Button(text='plot', pos=(4 * parent.width, 0)) self.setGoalsbtn = Button(text='drawing', pos=(5 * parent.width, 0)) clearbtn.bind(on_release=self.clear_canvas) savebtn.bind(on_release=self.save) loadbtn.bind(on_release=self.load) self.pausebtn.bind(on_release=self.pauseSwitch) plotbtn.bind(on_release=self.plot) self.setGoalsbtn.bind(on_release=self.setGoals) parent.add_widget(self.painter) parent.add_widget(clearbtn) parent.add_widget(savebtn) parent.add_widget(loadbtn) parent.add_widget(self.pausebtn) parent.add_widget(plotbtn) parent.add_widget(self.setGoalsbtn) parent.add_widget(dest) parent.add_widget(circle) return parent def pauseCheck(self, dt): if self.paused == False: self.parent.update() else: if self.last_size != self.parent.size: print("resized") global longueur global largeur longueur = self.parent.width largeur = self.parent.height self.parent.car.center[0] = longueur * 0.5 self.parent.car.center[1] = largeur * 0.125 self.painter.canvas.clear() init() self.parent.update() del self.goals[:] print('goals cleared') self.last_size[0] = self.parent.size[0] self.last_size[1] = self.parent.size[1] shrink_updated = False def pauseSwitch(self, obj): self.paused = 1 - self.paused if self.paused == True: self.pausebtn.text = 'start' else: self.pausebtn.text = 'pause' self.parent.updateGoal() def setGoals(self, obj): self.painter.setgoals = 1 - self.painter.setgoals if self.painter.setgoals == True: self.setGoalsbtn.text = 'setting goals' else: self.setGoalsbtn.text = 'drawing' del self.goals[:] print('goals cleared') def plot(self, obj): plt.plot(scores) plt.show() def clear_canvas(self, obj): global sand self.painter.canvas.clear() sand = np.zeros((longueur, largeur)) shrink_updated = False def save(self, obj): print("saving brain...") self.brain.save() plt.plot(scores) plt.show() def load(self, obj): print("loading last saved brain...") self.brain.load()
# if user clicks right - no red point will be added Config.set('input', 'mouse', 'mouse,multitouch_on_demand') """ introducing the prev_x and prev_y, keeps the previous point in memory where the sand was drawn the total points and the length of the previous drawing """ prev_x = 0 prev_y = 0 total_points = 0 length = 0 """ The AI, the 'brain', that represent the Q function and contains our neural network. """ brain = Dqn(5, 3, 0.9) # 5 sensors, 3 actions, gama - 0.9 action2rotation = [0, 20, -20] # action - 0 => 0, action - 1 = > rotate 20, etc prev_reward = 0 scores = [] first_update = True # Initialize map only once def init(): global sand # an array of sand if sand value 1 otherwise 0 global goal_x # where the car has to go global goal_y global first_update sand = np.zeros((longueur, largeur)) goal_x = 20 # the goal to reach is the upper left of the map not 0 don't
class CarApp(App): scores = [] game_widget = None right_panel = None paused = False current_popup = None config_widget = None top_panel = None stats_widget = None map_dialog = None def __init__(self, **kwargs): super(CarApp, self).__init__(**kwargs) self.painter = PaintWidget() self.self_driving_config = Configuration() self.self_driving_config.load() self.brain = Dqn(6, 3, self.self_driving_config) def build(self): self.game_widget = Game() self.game_widget.driving_config = self.self_driving_config self.game_widget.brain = self.brain self.game_widget.scores = self.scores self.game_widget.serve_car() Clock.schedule_interval(self.game_widget.update, 1.0 / 60.0) self.painter.game = self.game_widget self.game_widget.add_widget(self.painter) self.top_panel = TopPanel() self.top_panel.graph_widget.game_widget = self.game_widget self.game_widget.stats_widget = self.top_panel.stats_widget action_bar = TopMenuWidget() action_bar.pause_btn.bind(on_release=self.pause_resume) action_bar.save_brain_button.bind(on_release=self.save_brain) action_bar.save_map_button.bind(on_release=self.show_save_map) action_bar.load_btn.bind(on_release=self.load) action_bar.load_map_btn.bind(on_release=self.show_load_map) action_bar.clear_btn.bind(on_release=self.clear_canvas) action_bar.config_btn.bind(on_release=self.show_configuration) root = RootWidget() root.add_widget(action_bar) root.add_widget(self.top_panel) root.add_widget(self.game_widget) return root def put_stats(self, dt): print("Checking for stats") if self.top_panel.stats_widget is None: return print("stats widget now available") return False def pause_resume(self, btn=None): self.paused = not self.paused self.game_widget.pause_resume() self.top_panel.graph_widget.pause_resume() if not self.paused: Clock.schedule_interval(self.game_widget.update, 1.0 / 60.0) def clear_canvas(self, obj=None): self.painter.canvas.clear() self.game_widget.reset_sand() def save_brain(self, obj): print("saving brain...") self.brain.save() def show_save_map(self, obj): self.pause_resume() self.map_dialog = MapDialog() self.current_popup = Popup(content=self.map_dialog, auto_dismiss=False, title='Save Map', size_hint=(None, None), size=(400, 200)) self.map_dialog.save_btn.bind(on_release=self.map_dialog_action) self.map_dialog.cancel_btn.bind(on_release=self.close_popup) self.current_popup.open() def show_load_map(self, obj): self.pause_resume() self.map_dialog = MapDialog(save_mode=False) self.current_popup = Popup(content=self.map_dialog, auto_dismiss=False, title='Load Map', size_hint=(None, None), size=(400, 200)) self.map_dialog.save_btn.bind(on_release=self.map_dialog_action) self.map_dialog.cancel_btn.bind(on_release=self.close_popup) self.current_popup.open() def map_dialog_action(self, btn): if self.map_dialog.save_mode: self.painter.save(self.map_dialog.filename_input.text) else: self.clear_canvas() self.painter.load(self.map_dialog.filename_input.text) self.current_popup.dismiss() self.pause_resume() def load(self, obj): print("loading last saved brain...") self.brain.load() def show_configuration(self, btn): self.pause_resume() self.config_widget = ConfigurationWidget() self.config_widget.set_config(self.self_driving_config) self.current_popup = Popup(content=self.config_widget, auto_dismiss=False, title='Configuration') self.current_popup.open() self.config_widget.save_btn.bind(on_release=self.save_configuration) self.config_widget.cancel_btn.bind(on_release=self.close_popup) def save_configuration(self, btn): self.pause_resume() self.self_driving_config.update(self.config_widget.get_dict()) self.game_widget.update_config(self.self_driving_config) self.clear_canvas() self.current_popup.dismiss() def close_popup(self, btn): self.pause_resume() self.current_popup.dismiss()
def __init__(self, **kwargs): super(CarApp, self).__init__(**kwargs) self.painter = PaintWidget() self.self_driving_config = Configuration() self.self_driving_config.load() self.brain = Dqn(6, 3, self.self_driving_config)
random.seed(9001) from kivy.core.image import Image # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 speed = 1 gamma = 0.8 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(5, 41, gamma) print("loading last saved brain...") brain.load() # action2rotation = [0,20,-20] action2rotation = [i for i in range(-20, 21, 1)] last_reward = 0 # Initializing the map first_update = True # Initializing the last distance last_distance = 0 experiment = 1
if distance > 50: ui.root.after(40, lambda: loop(1000)) else: print('Destination reached') if __name__ == '__main__': world = World( width=300, height=300, car_point=Point(50, 50), car_orientation=0 ) ui = UI( width=300, height=300, on_cls=clean_canvas, on_save=save_model, on_load=load_model, on_create_sand=put_sand ) # 5 sensors, 3 actions, gama = 0.9 network = Dqn(5, 3, 0.9) loop(0) ui.loop()
time.sleep(1) gpio.setup(trig, gpio.OUT) gpio.output(trig, 0) gpio.setup(echo, gpio.IN) gpio.setup(trig_left, gpio.OUT) gpio.output(trig_left, 0) gpio.setup(echo_left, gpio.IN) gpio.setup(trig_right, gpio.OUT) gpio.output(trig_right, 0) gpio.setup(echo_right, gpio.IN) brain = Dqn(3, 3, 0.9) count = 0 def act(last_reward, last_signal): gpio.output(trig, 1) time.sleep(0.00001) gpio.output(trig, 0) while gpio.input(echo) == 0: pass start = time.time() while gpio.input(echo) == 1: pass stop = time.time()
import socket from multiprocessing import Process import matplotlib.pyplot as plt import os import numpy as np import sys import tty, termios import time from ai import Dqn ACTION = ['a', 'w', 'd'] train = Dqn(5, 3, 0.9) scores = [] ITE = [] lines = [] fig = plt.figure(1, figsize=(10, 8), dpi=120) ax = fig.add_subplot(1, 1, 1) plt.ion() # 使图标实时显示 plt.show() plt.xlabel('iteration') plt.ylabel('scores') plt.title('ScoreResult') def save(): train.save() global scores global ITE global iteration global lines global ax
from kivy.clock import Clock # Importing the Dqn object from our AI in ai.py from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(5, 3, 0.9, 100000) action2rotation = [0, 20, -20] last_reward = 0 scores = [] # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global first_update sand = np.zeros((longueur, largeur)) goal_x = 20
from kivy.clock import Clock # Importing the Dqn object from our AI in ai.py from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(5, 3, 0.8) action2rotation = [0, 20, -20] last_reward = 0 scores = [] # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global first_update sand = np.zeros((longueur, largeur)) goal_x = 20
SIZE = 500 GRID_LEN = 4 GRID_PADDING = 10 BACKGROUND_COLOR_GAME = "#92877d" BACKGROUND_COLOR_CELL_EMPTY = "#9e948a" BACKGROUND_COLOR_DICT = {2: "#eee4da", 4: "#ede0c8", 8: "#f2b179", 16: "#f59563", \ 32: "#f67c5f", 64: "#f65e3b", 128: "#edcf72", 256: "#edcc61", \ 512: "#edc850", 1024: "#edc53f", 2048: "#edc22e"} CELL_COLOR_DICT = {2: "#776e65", 4: "#776e65", 8: "#f9f6f2", 16: "#f9f6f2", \ 32: "#f9f6f2", 64: "#f9f6f2", 128: "#f9f6f2", 256: "#f9f6f2", \ 512: "#f9f6f2", 1024: "#f9f6f2", 2048: "#f9f6f2"} FONT = ("Verdana", 40, "bold") brain = Dqn(16, 4, 0.9) moves = [up, down, right, left] last_reward = 0 scores = [] class GameGrid(Frame): def __init__(self): Frame.__init__(self) self.grid() self.master.title('2048') self.grid_cells = [] self.init_grid() self.init_matrix()
from kivy.graphics import Color, Line, InstructionGroup from kivy.config import Config from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty from kivy.vector import Vector from kivy.clock import Clock from ai import Dqn Config.set('input', 'mouse', 'mouse,multitouch_on_demand') last_x = 0 last_y = 0 n_points = 0 length = 0 brain_1 = Dqn(5, 3, 0.9) brain_2 = Dqn(5, 3, 0.9) action2rotation = [0, 20, -20] last_reward_1 = 0 last_reward_2 = 0 scores_1 = [] scores_2 = [] # init map first_update = True def init(): global sand global goal_x_1 global goal_y_1
# Importing the Dqn object(which will act as the brain of the car) from our AI in ai.py from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Variables to track the mouse pointer interaction with canvas. # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(5,3,0.9) # 5 inputs of sensors, 3 actions(rotations), gama = 0.9 action2rotation = [0,18,-18] #possible rotations last_reward = 0 scores = [] # initializing the mean score curve (sliding window of the rewards) with respect to time # Initializing the map first_update = True def init(): global sand # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise. global goal_x # x-coordinate of the goal (where the car has to go, that is the airport or the downtown) global goal_y # y-coordinate of the goal (where the car has to go, that is the airport or the downtown) global first_update sand = np.zeros((longueur,largeur)) goal_x = 20 goal_y = largeur - 20 first_update = False
from kivy.clock import Clock # Importing the Dqn object from our AI in ai.py from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(5,3,0.9) #5 corressponds to the states encoded vectors of input, 3 - possible actions - go left, right or straight. #0.9 is again the parameter in the deque learning algorithm. action2rotation = [0,20,-20] # vectro of 3 elements - actions are encoded by 3 numbers. #If 0(index of action) is 0 - corresponds to going left, If 1 then straight. The code will go 20 degrees #to the specified direction. The code will go -20 degrees and go to the left. last_reward = 0 # If car doesn't go into sand it'll be positive or else it'll be negative. scores = [] # scores - vector that contains the rewards so that you can make a curve of the mear square #reward with respect to time. # Initializing the map first_update = True def init(): global sand # array in which cells will be the pixels of the map global goal_x # the destination - upper left corner of the map. global goal_y # global first_update
from ai import Dqn # In[4]: # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # In[5]: # Getting our AI. Changed parameters for testing brain = Dqn(5, 3, 0.7) action2rotation = [0, 20, -20] last_reward = 0 scores = [] # In[6]: # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global time_lapse
# Importing the Deep-Q Network - "Brain of the Car" object from our AI in ai.py from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 ## total number of points in the last drawing length = 0 ## total length of the last drawing # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn( 5, 3, 0.9) ## object(5-dimenisons; 3-number of actions; 0.9 - gamma parameter) action2rotation = [0, 20, -20] ## (20 correspond to rotation angle) last_reward = 0 ## initializing the last reward scores = [] ## contains rewards # Initializing the map first_update = True def init(): global sand ## 1 if there is a sand(penalty) and 0 if ther is none global goal_x ## destination coord global goal_y ## destination coord global first_update sand = np.zeros(
def process_img(original_image): processed_img = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY) processed_img = cv2.resize(processed_img, (80, 80)) return processed_img def saveall(ai, data): ai.save() plt.plot(data) plt.savefig('progression.png') last_reward = 0 memo = [] brain = Dqn((1, 80, 80), 3, 0.9) condition = True last_state = np.zeros((1, 80, 80)) counter = 0 steps_count = 0 mean_steps = 0 scores = [] up_pressed = False dino = cv2.cvtColor(cv2.imread('game_over.png'), cv2.COLOR_BGR2GRAY) w, h = dino.shape[:2] brain.load() while (condition): action = brain.update(last_reward, last_state) if action == 2:
from kivy.clock import Clock # Importação da IA que está no arquivo ai.py from ai import Dqn # Não permite adicionar um ponto vermelho no cenário quando é clicado com o botão direito do mouse Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # As variáveis last_x e last_y são usadas para manter na memória o último ponto quando desenhamos areia no mapa last_x = 0 last_y = 0 n_points = 0 # Número total de pontos do último desenho length = 0 # Tamanho do último desenho # Criamos um objeto que chamamos de brain (cérebro), que contém a rede neural que retorna o valor de Q brain = Dqn(5, 3, 0.9) # 5 entradas (sensores + direção), 3 saídas e valor de gamma action2rotation = [ 0, 20, -20 ] # action = 0 => sem rotação, action = 1 => rotaciona 20 graus, action = 2 => rotaciona -20 graus last_reward = 0 # inicialização da última recompensa scores = [ ] # inicialização do valor médio das recompensas (sliding window) com relação ao tempo # Inicialização do mapa first_update = True # usado para inicializar o mapa somente uma vez def init(): global sand # a areia é representada por um vetor que possui a mesma quantidade de pixels que a interface completo - 1 se tem areia e 0 se não tem areia global goal_x # coordenada x do objetivo (para onde o carro vai, do aeroporto para o centro ou o contrário) global goal_y # coordenada y do objetivo (para onde o carro vai, do centro para o aeroporto ou o contrário)
from kivy.clock import Clock # Importing the Dqn object from our AI in ai.py from ai import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(6, 3, 0.9) action2rotation = [0, 20, -20] last_reward = 0 scores = [] start_time = 0 elapsed_time = 0 # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global first_update global start_time
# Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 action2rotation = [0, 20, -20] last_reward = 0 scores = [] # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(input_size=5, nb_action=len(action2rotation), gamma=0.9) # Initializing the map first_update = True def init(): global sand global goal_x global goal_y global first_update sand = np.zeros((longueur, largeur)) goal_x = 20 goal_y = largeur - 20 first_update = False
class AgentRealistic: def __init__(self,agent_host,agent_port, mission_type, mission_seed, solution_report, state_space_graph): """ Constructor for the realistic agent """ self.AGENT_MOVEMENT_TYPE = 'Discrete' #This can be varied between the following - {Absolute, Discrete, Continuous} self.AGENT_NAME = 'Realistic' self.AGENT_ALLOWED_ACTIONS = ["movenorth 1", "movesouth 1", "movewest 1", "moveeast 1"] self.agent_host = agent_host self.agent_port = agent_port self.mission_seed = mission_seed self.mission_type = mission_type self.state_space = None; # Note - To be a true Realistic Agent, it can not know anything about the state_space a priori! self.solution_report = solution_report; self.solution_report.setMissionType(self.mission_type) self.solution_report.setMissionSeed(self.mission_seed) self.last_reward = 0 self.accumulative_reward = 0 self.brain = Dqn(2, len(self.AGENT_ALLOWED_ACTIONS), 0.9) self.brain.load() #----------------------------------------------------------------------------------------------------------------# def __ExecuteActionForRealisticAgentWithNoisyTransitionModel__(self, idx_requested_action, noise_level): """ Creates a well-defined transition model with a certain noise level """ n = len(self.AGENT_ALLOWED_ACTIONS) pp = noise_level/(n-1) * np.ones((n,1)) pp[idx_requested_action] = 1.0 - noise_level idx_actual = np.random.choice(n, 1, p=pp.flatten()) # sample from the distribution of actions actual_action = self.AGENT_ALLOWED_ACTIONS[int(idx_actual)] self.agent_host.sendCommand(actual_action) return actual_action #----------------------------------------------------------------------------------------------------------------# def run_agent(self): """ Run the Realistic agent and log the performance and resource use """ partialReward = 0 #-- Load and initiate mission --# print('Generate and load the ' + self.mission_type + ' mission with seed ' + str(self.mission_seed) + ' allowing ' + self.AGENT_MOVEMENT_TYPE + ' movements') mission_xml = init_mission(self.agent_host, self.agent_port, self.AGENT_NAME, self.mission_type, self.mission_seed, self.AGENT_MOVEMENT_TYPE) self.solution_report.setMissionXML(mission_xml) self.solution_report.start() time.sleep(1) state_t = self.agent_host.getWorldState() first = True # -- Get a state-space model by observing the Orcale/GridObserver--# while state_t.is_mission_running: if first: time.sleep(2) first = False # -- Basic map --# state_t = self.agent_host.getWorldState() if state_t.number_of_observations_since_last_state > 0: msg = state_t.observations[-1].text # Get the details for the last observed state oracle_and_internal = json.loads(msg) # Parse the Oracle JSON grid = oracle_and_internal.get(u'grid', 1) xpos = oracle_and_internal.get(u'XPos', 1) zpos = oracle_and_internal.get(u'ZPos', 1) ypos = oracle_and_internal.get(u'YPos', 1) yaw = oracle_and_internal.get(u'Yaw', 1) pitch = oracle_and_internal.get(u'Pitch', 1) #last_signal = [xpos, zpos, ypos, yaw, pitch] last_signal = [xpos, ypos] action = self.brain.update(self.last_reward, last_signal) print("Requested Action:", self.AGENT_ALLOWED_ACTIONS[action]) self.__ExecuteActionForRealisticAgentWithNoisyTransitionModel__(action, 0.1) time.sleep(0.02) self.solution_report.action_count = self.solution_report.action_count + 1 for reward_t in state_t.rewards: partialReward += reward_t.getValue() #self.last_reward = reward_t.getValue() self.accumulative_reward += reward_t.getValue() self.solution_report.addReward(reward_t.getValue(), datetime.datetime.now()) print("Reward_t:",reward_t.getValue()) print("Cummulative reward so far:", self.accumulative_reward) print("Last Reward:{0}".format(partialReward)) self.last_reward = partialReward partialReward = 0 return
import numpy as np from random import random, randint import matplotlib.pyplot as plt import time # Importing the Dqn object from our AI in ia.py from ai import Dqn # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 # the total number of points in the last drawing length = 0 # the length of the last drawing # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(12, 3, 0.9) # 5 sensors, 3 actions, gama = 0.9 action2rotation = [ 0, 10, -10 ] # action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres last_reward = 0 # initializing the last reward scores = [ ] # initializing the mean score curve (sliding window of the rewards) with respect to time # Creating the car class class Car: angle = 0 # initializing the angle of the car (angle between the x-axis of the map and the axis of the car) rotation = 0 # initializing the last rotation of the car (after playing the action, the car does a rotation of 0, 20 or -20 degrees) velocity = 0
import sys from ai import Dqn import numpy as np from game import Game from PyQt5.QtGui import * from PyQt5.QtWidgets import * from PyQt5.QtCore import * WINDOW_WIDTH = 1136 WINDOW_HEIGHT = 850 CARD_WIDTH = 167 CARD_HEIGHT = 225 brain = Dqn(3, 2, 0.9) action = [1, 0] last_reward = 0 scores = [] buttonStyle = """ QPushButton { font-size: 26px; width: 100px; border-radius: 10px; border-style: outset; background: white; padding: 5px; } QPushButton:hover { background: qradialgradient(