def train_model(): # Initiates the env env = gym.make('Mario-Kart-Luigi-Raceway-v0') resolution = (120, 160) actions = [ [-60, 0, 1, 0, 0], # left [60, 0, 1, 0, 0], # right [0, -80, 0, 1, 0], # back [0, 0, 1, 0, 0] ] # go straight # [ 0, 0, 0, 1, 0]] # brake # Initiates Model model = DQNModel(resolution=resolution, nb_frames=learn_param['nb_frames'], actions=actions) # print("number of actions: ", len(doom.actions)) # 16 if model_weights: model.load_weights(model_weights) agent = RLAgent(model, **learn_param) # Preform Reinforcement Learning on Scenario agent.train(env)
def train_model(): ''' Method trains primitive DQN-Model. ''' # Initiates VizDoom Scenario doom = DoomScenario(scenario) # print(doom.get_processed_state(depth_radius, depth_contrast).shape[-2:]) # Initiates Model model = DQNModel(resolution=doom.get_processed_state( depth_radius, depth_contrast).shape[-2:], nb_frames=learn_param['nb_frames'], actions=doom.actions, depth_radius=depth_radius, depth_contrast=depth_contrast) # print("number of actions: ", len(doom.actions)) # 16 if model_weights: print("with a pretrained weights-------by amber") model.load_weights(model_weights) agent = RLAgent(model, **learn_param) # Preform Reinforcement Learning on Scenario agent.train(doom)
def main(): config = dict() config['lr'] = 0.0000001 config['stocks'] = ['a', 'aa'] config['stock_num'] = len(config['stocks']) tf.reset_default_graph() agent = RLAgent(config) agent.RL_train()
def test_model(runs=1): ''' Method used to test DQN models on VizDoom scenario. Testing run are replayed in higher resolution (800X600). Param: runs - int : number of test runs done on model. ''' # Initiates VizDoom Scenario doom = DoomScenario(scenario) # Load Model and Weights model = DQNModel(resolution=doom.get_processed_state( depth_radius, depth_contrast).shape[-2:], nb_frames=test_param['nb_frames'], actions=doom.actions, depth_radius=depth_radius, depth_contrast=depth_contrast) model.load_weights(model_weights) agent = RLAgent(model, **test_param) print("\nTesting DQN-Model:", model_weights) # Run Scenario and play replay for i in range(runs): doom = DoomScenario(scenario) doom.run(agent, save_replay='test.lmp', verbose=True) doom.replay('test.lmp', doom_like=False)
def __init__(self, _id, tsc_data, conn, args, exp_replay, neural_networks, eps, rl_stats, reward): super(RLTrafficSignalController, self).__init__(_id, tsc_data, conn, args) self.rlagent = RLAgent(neural_networks, eps, exp_replay, tsc_data['n_green_phases'], args.n_steps, args.batch, args.replay, args.gamma) ###set intersection to red default self.id = _id #self.phase_buffer = deque() self.exp = {} self.current_phase = tsc_data['all_red'] self.args = args self.phase_deque = deque() self.state_deque = deque() self.acting = False self.rl_stats = rl_stats self.reward = reward self.rewards = []
def __init__(self, env, gamma, learning_rate, epsilon, epsilon_min, epsilon_decay, divisor, buckets, training_episodes, testing_episodes, frames): RLAgent.__init__(self, env, training_episodes, testing_episodes, frames) self.env = env self.gamma = gamma self.learning_rate = learning_rate self.epsilon = epsilon self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.divisor = divisor self.buckets = ( 3, 3, 6, 6, ) self.Q = np.zeros(self.buckets + (self.env.action_space.n, ))
def __init__(self, screen, speed, use_keras=True, pre_trained=True): super().__init__(screen) self.total_training_games = TOTAL_TRAINING_GAMES # takes in x, y of the snake and the speed of the snake self.agent = RLAgent(speed, use_keras, pre_trained) self.go_through_boundary = True # total number of games required to train self.idle_frames = 0 self.total_steps = 0 # # try to load the numpy data, if not possible then set the training_data to an empty list # try: # # loaded training_data needs to be converted into a list # self.training_data = np.load("./rl-learning-data/rl-data.npy").tolist() # print("Training data loaded from disk...") # except: # print("Training data couldn't be loaded from disk...") # self.training_data = [] # uncomment this if you decide to load data self.training_data = []
def __init__(self, env, config, epsilon, training_episodes, testing_episodes, frames): RLAgent.__init__(self, env, training_episodes, testing_episodes, frames) self.epsilon = epsilon self.name = config.name self.action_space_dim = self.env.action_space.n self.observation_space_dim = self.env.observation_space.shape[0] # Config has all hyperparameters stored. self.config = config self.memory = deque(maxlen=self.config.memory_size) self.replay_counter = 0 # Keep track of how many frames the model ran through in total. self.training_frame_count = 0 self.model = self.initialize_model()
def run_weights(): env = gym.make('Mario-Kart-Luigi-Raceway-v0') resolution = (120, 160) actions = [ [-60, 0, 1, 0, 0], # left [60, 0, 1, 0, 0], # right [0, -80, 0, 1, 0], # back [0, 0, 1, 0, 0] ] # go straight # [ 0, 0, 0, 1, 0]] # brake # Load Model and Weights model = DQNModel(resolution=resolution, nb_frames=test_param['nb_frames'], actions=actions) model.load_weights(model_weights) agent = RLAgent(model, **test_param) agent.test(env)
def start_game(): # p1 = KickAI(gateway) # p1 = MCTS(gateway) p1 = RLAgent(gateway) p2 = Machete(gateway) # p2 = DisplayInfo(gateway) manager.registerAI(p1.__class__.__name__, p1) manager.registerAI(p2.__class__.__name__, p2) print("Start game") game = manager.createGame("ZEN", "ZEN", p1.__class__.__name__, p2.__class__.__name__, GAME_NUM) manager.runGame(game) print("After game") sys.stdout.flush()
def show_policy(rl_agent: RLAgent, size: int = 8) -> None: """ Visualize agent policy for FrozenLake environments. Prints characters that looks like maximum action for each possible state. Description: '<' - left '^' - up '>' - right '.' - down Args: rl_agent: Trained agent. size: Size of area in chosen environment along single dimension. """ actions_viz = { 0: "<", 1: ".", 2: ">", 3: "^", } actions = [rl_agent.get_action(i) for i in range(size**2)] viz = "".join(list(map(actions_viz.get, actions))) # type: ignore for i in range(size): print(viz[i * size:(i + 1) * size])
from PyQt5.QtWidgets import QApplication from Window import Window from EmulatorInterface import EmulatorInterface from RLAgent import RLAgent import sys ''' Creates a global Agent object in order to train the model ''' agent = RLAgent() ''' :desc: This is the update function that can be used each frame to take control of the emulator. Below are some examples in the comments where you can use an emulator object to map key presses to the ''' def onUpdate(window, emulator): src = window.grabScreenshot() global agent ''' TODO: Here we can simulate the AI pressing the buttons. Examples: emulator.emulatePress("throttle") # Emulates driving forward emulator.emulatePress("right") # Emulates steering to the right
# Initiates the env env = gym.make('Mario-Kart-Luigi-Raceway-v0') resolution = (120, 160) actions = [ [-60, 0, 1, 0, 0], # left [60, 0, 1, 0, 0], # right [0, -80, 0, 1, 0], # back [0, 0, 1, 0, 0] ] # go straight # [ 0, 0, 0, 1, 0]] # brake # Initiates Model model = DQNModel(resolution=resolution, nb_frames=learn_param['nb_frames'], actions=actions) # print("number of actions: ", len(doom.actions)) # 16 if model_weights: model.load_weights(model_weights) else: print("Please provide a model_weights file") agent = RLAgent(model, **learn_param) # give a step number randomly to catch a random screen shot agent.visualize(env)
max_epsilon=1.0 no_episodes=[1000,2000,3000,4000,5000] wonRL_alpha=[] def plot(list2): print(alpha) print(list2) # Plot plt.plot(epsilon, list2) plt.xlim(0,1) plt.ylim(100,2500) # naming the x axis plt.xlabel('epsilon') # naming the y axis plt.ylabel('No times RL agent won') # function to show the plot plt.show() for i in range(len(epsilon)): player1 = RLAgent(alpha[0], gamma[4], epsilon[i], exploration_decay_rate, min_epsilon, max_epsilon) player2 = RandomAgent() Env = Environment(player1, player2, no_episodes[4]) wonRL_alpha.append(Env.trainAgent()) Env.saveIntoFile(i) plot(wonRL_alpha)
class RLAgentPlayer(Player): def __init__(self, screen, speed, use_keras=True, pre_trained=True): super().__init__(screen) self.total_training_games = TOTAL_TRAINING_GAMES # takes in x, y of the snake and the speed of the snake self.agent = RLAgent(speed, use_keras, pre_trained) self.go_through_boundary = True # total number of games required to train self.idle_frames = 0 self.total_steps = 0 # # try to load the numpy data, if not possible then set the training_data to an empty list # try: # # loaded training_data needs to be converted into a list # self.training_data = np.load("./rl-learning-data/rl-data.npy").tolist() # print("Training data loaded from disk...") # except: # print("Training data couldn't be loaded from disk...") # self.training_data = [] # uncomment this if you decide to load data self.training_data = [] def consumption_check(self): if collision(self.agent.body, self.food_stack[0]): return True else: return False def display_info(self, score, high_score): pygame.font.init() default_font = pygame.font.get_default_font() font_renderer = pygame.font.Font(default_font, 10) # To create a surface containing `Some Text` label = font_renderer.render("Score - {}, High score - {}".format( score, high_score), 1, (0, 0, 0)) # RGB Color self.screen.blit(label, (0, 0)) def get_angle(self): head_x = self.agent.body.get_x() head_y = self.agent.body.get_y() segment_x = self.agent.body.body[0].get_x() segment_y = self.agent.body.body[0].get_y() food_x = self.food_stack[0].get_x() food_y = self.food_stack[0].get_y() snake_direction = np.array([head_x, head_y]) - np.array( [segment_x, segment_y]) food_direction = np.array([food_x, food_y]) - np.array( [head_x, head_y]) a = snake_direction / np.linalg.norm(snake_direction) b = food_direction / np.linalg.norm(food_direction) return math.atan2(a[0] * b[1] - a[1] * b[0], a[0] * b[0] + a[1] * b[1]) / math.pi def map_keys(self, pred): if self.agent.body.current_direction == "right": # left from current point of view if pred == -1: self.agent.body.change_direction("up") # right from current point of view elif pred == 1: self.agent.body.change_direction("down") elif self.agent.body.current_direction == "left": # left from current point of view if pred == -1: self.agent.body.change_direction("down") # right from current point of view elif pred == 1: self.agent.body.change_direction("up") elif self.agent.body.current_direction == "up": # left from current point of view if pred == -1: self.agent.body.change_direction("left") # right from current point of view elif pred == 1: self.agent.body.change_direction("right") elif self.agent.body.current_direction == "down": # left from current point of view if pred == -1: self.agent.body.change_direction("right") # right from current point of view elif pred == 1: self.agent.body.change_direction("left") def get_input_data(self): # all the prediction of the next frame's collision movements coll_pred = self.agent.body.self_collision_prediction() # get distance from the snake and food distance_from_food = self.agent.body.distance_from_food( self.food_stack[0]) angle = self.get_angle() return [coll_pred[0], coll_pred[1], coll_pred[2], angle] def gather_training_data(self, total_training_games=TOTAL_TRAINING_GAMES): self.total_training_games = total_training_games self.wrong_turn = 0 self.wrong_direction = 0 self.right_direction = 0 for _ in tqdm(range(self.total_training_games)): self.one_game_iteration() # uncomment to save the training data # print("Training data saved to disk...") # # save the numpy data # np.save("./rl-learning-data/rl-data.npy", self.training_data) average_steps = self.total_steps / self.total_training_games print("Total Number of right directions: {}".format( self.right_direction)) print("Total Number of wrong directions: {}".format( self.wrong_direction)) print("Total Number of wrong turns: {}".format(self.wrong_turn)) print("Average frames rendered per game: {}".format(average_steps)) def train_agent(self): print("Begining to train with {} data".format(len(self.training_data))) self.agent.learn(self.training_data) def one_game_iteration(self): score = 3 prev_food_distance = self.agent.body.distance_from_food( self.food_stack[0]) prev_nn_data = self.get_input_data() # end dictates if the game has finished or not, initially it will be false end = False # the game is played until it is ended, so till the snake either hits the wall or collides with itself while not end: self.total_steps += 1 end, curr_nn_data, current_action = self.render_training_frame() prev_nn_data.append(current_action) prev_nn_data = np.array(prev_nn_data) if end: self.wrong_turn += 1 self.training_data.append([prev_nn_data, -1]) # when game ends we reconstruct the body of the snake self.agent.create_new_body() self.spawn_food() break else: food_distance = self.agent.body.distance_from_food( self.food_stack[0]) if self.agent.body.score > score or food_distance < prev_food_distance: self.right_direction += 1 self.training_data.append([prev_nn_data, 1]) else: self.wrong_direction += 1 self.training_data.append([prev_nn_data, 0]) prev_food_distance = food_distance prev_nn_data = curr_nn_data score = self.agent.body.score # make end to what its initial state was end = False # end of each game a new body is created self.agent.create_new_body() self.spawn_food() def process_training_data(self, right_direction, wrong_direction): new_training_data = [] to_match = 0 for i in range(len(self.training_data)): if self.training_data[i][1] == 1: to_match += 1 if to_match != wrong_direction: new_training_data.append(self.training_data[i]) else: new_training_data.append(self.training_data[i]) return new_training_data def render_training_frame(self): pygame.event.pump() for food in self.food_stack: food.draw(self.screen) action = random.randint(-1, 2) self.map_keys(action) end = self.agent.body.draw(self.screen, self.go_through_boundary) # check here if the snake ate the food if self.consumption_check(): self.spawn_food() # finally we grow the snake as well by adding a new segment to the snake's body self.agent.body.grow() nn_data = self.get_input_data() return end, nn_data, action def kill_idle_game(self): self.idle_frames += 1 if self.idle_frames == 400: self.idle_frames = 0 return True def use_brain_to_move(self): prev_nn_data = self.get_input_data() predictions = [] # all three possible directions are generated and for all possible direction values given those inputs # we ask the neural network which direction to step to for positive effect for action in range(-1, 2): nn_data = self.get_input_data() nn_data.append(action) nn_data = np.array(nn_data) # depending on previous observation what move should i generate predictions.append(self.agent.predict(nn_data)) action = np.argmax(np.array(predictions)) # to map the range value action -= 1 self.map_keys(action) def test_agent(self, dataset_games): game_iterations = 100 total_score = 0 high_score = 0 max_step = 1000 for _ in tqdm(range(game_iterations)): step = 0 while True: step += 1 score = self.agent.body.score for food in self.food_stack: food.draw(self.screen) self.use_brain_to_move() end = self.agent.body.draw(self.screen, self.go_through_boundary) # when the snake dies and the game ends if end or step == max_step: if score > high_score: high_score = score total_score += score # break the loop when the game ends self.agent.create_new_body() break # check here if the snake ate the food if self.consumption_check(): self.idle_frames = 0 self.spawn_food() # finally we grow the snake as well by adding a new segment to the snake's body self.agent.body.grow() average_score = total_score / game_iterations data_prints = [ "Neural Network played {}\n".format(dataset_games), "Highest Score in {} games: {}\n".format(game_iterations, high_score), "Average Score in {} games: {}\n".format(game_iterations, average_score) ] with open("test-result.txt", "a") as myfile: for prints in data_prints: myfile.write(prints) print(prints) def game_loop(self): game_iterations = 5 high_score = 0 for _ in range(game_iterations): while True: pygame.event.pump() self.screen.fill(self.background_color) score = self.agent.body.score self.display_info(score, high_score) for food in self.food_stack: food.draw(self.screen) self.use_brain_to_move() end = self.agent.body.draw(self.screen, self.go_through_boundary) #if snake doesnt do anything or the snake died then kill the game if end: # when the snake dies print("Died after turning its head -> {}".format( self.agent.body.current_direction)) time.sleep(1) if score > high_score: high_score = score # break the loop when the game ends self.agent.create_new_body() break # check here if the snake ate the food if self.consumption_check(): self.idle_frames = 0 self.spawn_food() # finally we grow the snake as well by adding a new segment to the snake's body self.agent.body.grow() pygame.display.flip() time.sleep(0.05) print("High score -> {}".format(high_score))
return np.random.random() * x - x / 2. def randomtarget(): geometry = { 'coordinates': [[rnd(), rnd()], [rnd(), rnd()]], 'orientations': [rnd(2 * np.pi)] * 2 } geometry['coordinates'] = map(np.array, geometry['coordinates']) return ShapeByGeometry(geometry) ############### AGENTS agents_pose = [Pose(0, 0), Pose(0, -2, -np.pi / 2)] agents = [RLAgent(i, pose=agents_pose[i]) for i in range(NUM_AGENTS)] eval_agents = [RLAgent(i, pose=agents_pose[i]) for i in range(NUM_AGENTS)] ############### ENVIRONMENT env = FormationEnvironment(targetshape, agents, num_iterations=HP.NUM_ITERATIONS, dt=HP.DT) eval_env = FormationEnvironment(targetshape, eval_agents, num_iterations=HP.NUM_ITERATIONS, dt=HP.DT) ############### PARTIALLY OBSERVED ENVS agent_observed_envs = {} for agent in env.agents.values():
def run(self): ####loop thru agents print('LEARNER neural networks') agent_networks = gen_neural_networks([tsc for tsc in self.agent_ids], self.net_data, self.args.hact, self.args.oact, self.args.lr, self.args.lre) ###load weights if we want if self.args.load is True: agent_weights = load_data('saved_weights.p') for tsc in self.agent_ids: agent_networks[tsc]['online'].set_weights( agent_weights[tsc] ) ###send weights to actors for tsc in self.agent_ids: weights = agent_networks[tsc]['online'].get_weights() ###send to actors self.rl_stats[tsc]['online'] = weights agent_networks[tsc]['target'].set_weights( weights ) ###ensure all learners have sent weights before starting self.barrier.wait() ###create rl agents using neural networks rl_agents = { tsc:RLAgent(agent_networks[tsc], self.args.eps, self.exp_replay[tsc], self.net_data['tsc'][tsc]['n_green_phases'], self.args.n_steps, self.args.batch, self.args.replay, self.args.gamma) for tsc in self.agent_ids} ###wait until sufficient exp in replay to start making updates self.barrier.wait() ###timer for stats self.last_update = time.time() period = 60 self.learn_time = time.time() if self.args.mode == 'train': ###reset n_exp count for agent in self.agent_ids: self.rl_stats[agent]['n_exp'] = 0 while not self.finished_learning(): for tsc in rl_agents: ###only do batch updates after something has been added to exp replay if self.rl_stats[tsc]['n_exp'] > 0: rl_agents[tsc].train_batch( self.rl_stats[tsc]['max_r']) self.rl_stats[tsc]['n_exp'] -= 1 self.rl_stats[tsc]['updates'] += 1 ###send online weight to shared dict for actors self.rl_stats[tsc]['online'] = rl_agents[tsc].get_params('online') ###clip exp replay diff = len(self.exp_replay[tsc]) - self.args.replay if diff > 0: del self.exp_replay[tsc][:diff] ###update target network to online on regular interval if self.rl_stats[tsc]['updates'] % self.args.target == 0: ###set target to online params rl_agents[tsc].set_params('target', self.rl_stats[tsc]['online']) ###try stats t = time.time() - self.last_update if t > period: print('========= AGENT EXP PROGRESS UPDATE LEARNER '+str(self.idx)+' =====') self.print_stats() self.last_update = time.time() T = time.time()-self.learn_time ###use min progress agent as the ETA estimate p min_progress = min([ self.rl_stats[agent]['updates']/float(self.args.updates) for agent in self.agent_ids]) eta = self.ETA( min_progress, T ) print('==== ETA seconds: '+str( round(eta, 0) )+' minutes: '+str( round(eta/60.0, 2) )+' hours: '+str( round(eta/3600.0, 2) )+' ====') #print(str(ETA( np.amin([ self.rl_stats[agent]['updates']/float(self.args.updates for agent in self.agent_ids])), T)) print('...end learner '+str(self.idx))
class RLTrafficSignalController(TrafficSignalController): ###implements a cycle, fixed uniform phase duration for all green phases def __init__(self, _id, tsc_data, conn, args, exp_replay, neural_networks, eps, rl_stats, reward): super(RLTrafficSignalController, self).__init__(_id, tsc_data, conn, args) self.rlagent = RLAgent(neural_networks, eps, exp_replay, tsc_data['n_green_phases'], args.n_steps, args.batch, args.replay, args.gamma) ###set intersection to red default self.id = _id #self.phase_buffer = deque() self.exp = {} self.current_phase = tsc_data['all_red'] self.args = args self.phase_deque = deque() self.state_deque = deque() self.acting = False self.rl_stats = rl_stats self.reward = reward self.rewards = [] def update(self, local_obs): ###update state buffer state = get_density(local_obs, self.tsc_data['inc_lanes'], self.tsc_data['lane_lengths'], self.args.v_len) self.state_deque.append(state) def next_phase_and_duration(self, local_obs): if len(self.phase_deque) == 0: if self.acting == True: if self.args.mode == 'train': next_s = self.observe_state() r = self.get_reward() terminal = True if np.sum( self.state_deque[-1]) == 0 else False self.rlagent.store_experience(self.exp['s'], self.exp['a'], next_s, r, terminal) self.rl_stats['n_exp'] += 1.0 / self.args.n_steps #if self.rl_stats['n_exp'] % 100 == 0: # print('exp replay size '+str(self.rl_stats['n_exp'])) # print('updates '+str(self.rl_stats['updates'])) if len(self.state_deque) == 0 or np.sum(self.state_deque[-1]) == 0: ###no vehicle present, default to all red self.phase_deque.append((self.tsc_data['all_red'], 1)) self.acting = False else: ###observe state s = self.observe_state() self.exp['s'] = s ###get new params before acting self.rlagent.set_params('online', self.rl_stats['online']) ##take action using rl agent s = s[np.newaxis, ...] action_idx = self.rlagent.get_action(s) self.exp['a'] = action_idx ###change action index to green traffic signal phase next_green = self.tsc_data['int_to_action'][action_idx] self.acting = True ###add transition phases for desired duration transitions = get_transitions(self.current_phase, next_green) for trans in transitions: if 'y' in trans: t = self.yellow_t else: t = self.red_t self.phase_deque.append((trans, t)) self.phase_deque.append((next_green, self.a_repeat)) next_phase_and_duration = self.phase_deque.popleft() next_phase = next_phase_and_duration[0] duration = next_phase_and_duration[1] return next_phase, duration def observe_state(self): traffic_state = np.array(self.state_deque[-1]) signal_state = np.array( self.tsc_data['phase_one_hot'][self.current_phase]) s = np.concatenate([traffic_state, signal_state]) #s = s[np.newaxis,...] return s def update_max_reward(self, r): abs_r = np.absolute(r) if abs_r > self.rl_stats['max_r']: self.rl_stats['max_r'] = abs_r def get_reward(self): r = -np.sum([self.reward(e) for e in self.tsc_data['inc_edges']]) #self.rewards.append(r) self.update_max_reward(r) return r