def main(): if len(sys.argv) >1: host = sys.argv[1] epi_file=open('../files/episode.txt') episode = epi_file.readline() epi_file.close() episode = int(episode) qagent=DQNAgent(14) data = 'x' while(data!='9'): data = send_action(9) ys,ds=qagent.get_data(episode,0) state = np.concatenate((ys,ds),axis=0) for step in range(1,t_steps+1): action = qagent.get_action(state) # action = qagent.get_action(state) reward = send_action(action) ys,ds = qagent.get_data(episode,step) n_state = np.concatenate((ys,ds),axis=0) state = n_state
player1.adjust_target_net() player2.adjust_target_net() game_over = False eps = 0.8 while not game_over: # Ask for Player 2 Input if turn == PLAYER2 and not game_over: #col = random.randint(0, COLUMN_COUNT-1) #col = pick_best_move(board, PLAYER2_PIECE) col = player2.get_action(board, 0) #print('col from DQN:', col) if not is_valid_location(board, col): game_over = True reward = -1 player2.receive_next_obs_rew_done(board, reward, game_over) num_disqualifications2 += 1 if is_valid_location(board, col): pygame.time.wait(500) row = get_next_open_row(board, col) drop_piece(board, row, col, PLAYER2_PIECE) reward = 0
player1.adjust_target_net() player2.adjust_target_net() eps *= eps_decay game_over = False while not game_over: # Ask for Player 2 Input if turn == PLAYER2 and not game_over: #col = random.randint(0, COLUMN_COUNT-1) #col = pick_best_move(board, PLAYER2_PIECE) col = player2.get_action(board, eps) #print('col from DQN:', col) if not is_valid_location(board, col): game_over = True reward = -1 player2.receive_next_obs_rew_done(board, reward, game_over) num_disqualifications2 += 1 if is_valid_location(board, col): #pygame.time.wait(500) row = get_next_open_row(board, col) drop_piece(board, row, col, PLAYER2_PIECE) reward = 0
import gym from agent import DQNAgent max_frames = 1000 batch_size = 4 env = gym.make("MountainCar-v0") agent = DQNAgent(env) state = env.reset() for frame in range(max_frames): env.render() action = agent.get_action(state) next_state, reward, done, _ = env.step(action) agent.replay_buffer.push(state, action, reward, next_state, done) if len(agent.replay_buffer) > batch_size: agent.update(batch_size) if done: state = env.reset() env.close()
class Game(): def __init__(self, width, height, rows, window, offx, offy, idx=""): self.SETTINGS = {} self.SETTINGS['w'] = width self.SETTINGS['h'] = height self.SETTINGS['r'] = rows self.SETTINGS['sB'] = width // rows self.SETTINGS['ox'] = offx * width self.SETTINGS['oy'] = offy * height self.idx = idx self.window = window self.snake = Snake((255, 0, 0), (self.SETTINGS['r'] // 2, self.SETTINGS['r'] // 2), self.SETTINGS) self.snack = Cube(self.randomSnack(), self.SETTINGS, color=(0, 255, 0)) self.dist = self.get_snack_distance() self.walls = self.get_wall_pos() self.model = Model(len(self.get_observation()), 4) self.tgt = Model(len(self.get_observation()), 4) self.agent = DQNAgent(self.model, self.tgt) self.reward = 0.0 self.setp_reward = 0.0 self.rewards = [] self.finished = False self.points = 0 self.points_ls = [] def get_wall_pos(self): pos = [] for l in range(self.SETTINGS['r']): pos.append((l, 0)) pos.append((l, self.SETTINGS['r'] - 1)) pos.append((0, l)) pos.append((self.SETTINGS['r'] - 1, l)) return pos def get_snack_distance(self): x = self.snake.head.pos[0] - self.snack.pos[0] y = self.snake.head.pos[1] - self.snack.pos[1] dist = math.sqrt(x * x + y * y) return dist def draw_wall(self): for l in self.walls: offx = self.SETTINGS['ox'] offy = self.SETTINGS['oy'] dis = self.SETTINGS['sB'] i = l[0] j = l[1] pygame.draw.rect( self.window, (0, 0, 255), (i * dis + 1 + offx, j * dis + 1 + offy, dis - 2, dis - 2)) def draw_grid(self): x = 0 y = 0 for l in range(self.SETTINGS['r']): offx = self.SETTINGS['ox'] offy = self.SETTINGS['oy'] x += self.SETTINGS['sB'] y += self.SETTINGS['sB'] pygame.draw.line(self.window, (30, 30, 30), (x + offx, 0 + offy), (x + offx, self.SETTINGS['w'] + offy)) pygame.draw.line(self.window, (30, 30, 30), (0 + offx, y + offy), (self.SETTINGS['h'] + offx, y + offy)) def redrawWindow(self): self.draw_grid() self.draw_wall() self.snake.draw(self.window) self.snack.draw(self.window) def randomSnack(self): while True: x = random.randrange(self.SETTINGS['r']) y = random.randrange(self.SETTINGS['r']) new_pos = (x, y) if new_pos not in self.snake.get_pos(): if new_pos not in self.get_wall_pos(): return new_pos def restart(self): self.snake = Snake((255, 0, 0), (self.SETTINGS['r'] // 2, self.SETTINGS['r'] // 2), self.SETTINGS) self.snack = Cube(self.randomSnack(), self.SETTINGS, color=(0, 255, 0)) while self.snake.head.pos == self.snack.pos: self.snack = Cube(self.randomSnack(), self.SETTINGS, color=(0, 255, 0)) self.rewards.append(self.reward) self.reward = 0.0 self.points_ls.append(self.points) self.points = 0 self.finished = False def get_average_reward(self): avg = sum(self.rewards) / len(self.rewards) self.rewards = [] return avg def new_generation(self, model, tgt): self.agent.update_model(model) self.agent.update_tgt(tgt) self.restart() def game_loop(self, train=False, model=None): observation = self.get_observation() self.reward -= 0.003 self.setp_reward = -0.003 if train: action = self.agent.get_action(observation) self.snake.move_model(action) else: assert model != None, "Error, no model" action = model(torch.Tensor(observation)).max(-1)[-1].item() self.snake.move_model(action) if self.snake.body[0].pos == self.snack.pos: self.snake.add_cube() self.snack = Cube(self.randomSnack(), self.SETTINGS, color=(0, 255, 0)) self.reward += 3.0 self.setp_reward = 3.0 self.points += 1 for el in self.snake.body[1:]: if el.pos == self.snake.head.pos: self.snake = Snake( (255, 0, 0), (self.SETTINGS['r'] // 2, self.SETTINGS['r'] // 2), self.SETTINGS) self.reward -= 1.0 self.setp_reward = -1.0 self.finished = True break for el in self.walls: if el == self.snake.head.pos: self.snake = Snake( (255, 0, 0), (self.SETTINGS['r'] // 2, self.SETTINGS['r'] // 2), self.SETTINGS) self.reward -= 1.0 self.setp_reward = -1.0 self.finished = True break if train: new_observation = self.get_observation() self.agent.step(self.setp_reward, new_observation, self.finished) if self.finished: self.restart() self.redrawWindow() def body_in_pos(self, x, y): for bd in self.snake.body[1:]: if bd.pos == [x, y]: return True return False def get_observation(self): obs = [] # snake head, and tail pos sx = self.snake.head.pos[0] sy = self.snake.head.pos[1] obs.append(sx) obs.append(sy) obs.append(self.snake.body[-1].pos[0]) obs.append(self.snake.body[-1].pos[1]) # snack pos obs.append(self.snack.pos[0]) obs.append(self.snack.pos[1]) # snack dist from head x = self.snack.pos[0] - sx y = self.snack.pos[1] - sy dist = math.sqrt(x**2 + y**2) obs.append(dist) # head dist from closes obsticle (wall || body) # -x dir if self.body_in_pos(sx - 1, sy) or sx - 1 == 0: obs.append(0) else: obs.append(1) # +x dir if self.body_in_pos(sx + 1, sy) or sx + 2 == self.SETTINGS['r']: obs.append(0) else: obs.append(1) # -y dir if self.body_in_pos(sx, sy - 1) or sy - 1 == 0: obs.append(0) else: obs.append(1) # +y dir if self.body_in_pos(sx, sy + 1) or sy + 2 == self.SETTINGS['r']: obs.append(0) else: obs.append(1) return obs def start(self): clock = pygame.time.Clock() while True: pygame.time.delay(50) clock.tick(10) self.game_loop()
screen.blit(label, (40, 10)) game_over = True turn += 1 turn = turn % 2 print_board(board) draw_board(board) # # Ask for Player 2 Input if turn == AI and not game_over: #col = random.randint(0, COLUMN_COUNT-1) #col = pick_best_move(board, AI_PIECE) #col, minimax_score = minimax(board, 5, -math.inf, math.inf, True) col = player2.get_action(board) print('col:', col) if not is_valid_location(board, col): label = myfont.render("Player 2 disqualified!!", 1, YELLOW) screen.blit(label, (40, 10)) game_over = True if is_valid_location(board, col): #pygame.time.wait(500) row = get_next_open_row(board, col) drop_piece(board, row, col, AI_PIECE) if winning_move(board, AI_PIECE): label = myfont.render("Player 2 wins!!", 1, YELLOW) screen.blit(label, (40, 10))
exploration_steps=500000, observation_steps=50000, loading_step=None, device_name="gpu:0") verbose_step = 20 total_reward = 0.0 episode_step = 0 time_step = time.time() for i_episode in range(100000): observation = env.reset() for t in range(10000000): # env.render() now_state = preprocess(observation) action = agent.get_action(now_state, training=True).numpy() observation, reward, done, info = env.step(action) if (done): done = 1 else: done = 0 next_state = preprocess(observation) agent.step(now_state, action, reward, next_state, done) total_reward += reward if done: if agent.step_count > agent.observation_steps: agent.copy_base_to_target() if i_episode % verbose_step == 0: