Beispiel #1
0
def main():
    if len(sys.argv) >1:
        host = sys.argv[1]
    epi_file=open('../files/episode.txt')
    episode = epi_file.readline()
    epi_file.close()
    episode = int(episode)
    qagent=DQNAgent(14)
    data = 'x'
    while(data!='9'):
        data = send_action(9)
    ys,ds=qagent.get_data(episode,0)
    state = np.concatenate((ys,ds),axis=0)

    for step in range(1,t_steps+1):
        action = qagent.get_action(state)
        # action = qagent.get_action(state)
        reward = send_action(action)
        ys,ds = qagent.get_data(episode,step)
        n_state = np.concatenate((ys,ds),axis=0)
        state = n_state
Beispiel #2
0
player1.adjust_target_net()
player2.adjust_target_net()

game_over = False

eps = 0.8

while not game_over:

    # Ask for Player 2 Input
    if turn == PLAYER2 and not game_over:

        #col = random.randint(0, COLUMN_COUNT-1)
        #col = pick_best_move(board, PLAYER2_PIECE)
        col = player2.get_action(board, 0)
        #print('col from DQN:', col)

        if not is_valid_location(board, col):
            game_over = True
            reward = -1
            player2.receive_next_obs_rew_done(board, reward, game_over)
            num_disqualifications2 += 1

        if is_valid_location(board, col):
            pygame.time.wait(500)
            row = get_next_open_row(board, col)
            drop_piece(board, row, col, PLAYER2_PIECE)

            reward = 0
Beispiel #3
0
        player1.adjust_target_net()
        player2.adjust_target_net()

        eps *= eps_decay

    game_over = False

    while not game_over:

        # Ask for Player 2 Input
        if turn == PLAYER2 and not game_over:

            #col = random.randint(0, COLUMN_COUNT-1)
            #col = pick_best_move(board, PLAYER2_PIECE)
            col = player2.get_action(board, eps)
            #print('col from DQN:', col)

            if not is_valid_location(board, col):
                game_over = True
                reward = -1
                player2.receive_next_obs_rew_done(board, reward, game_over)
                num_disqualifications2 += 1

            if is_valid_location(board, col):
                #pygame.time.wait(500)
                row = get_next_open_row(board, col)
                drop_piece(board, row, col, PLAYER2_PIECE)

                reward = 0
Beispiel #4
0
import gym
from agent import DQNAgent

max_frames = 1000
batch_size = 4

env = gym.make("MountainCar-v0")
agent = DQNAgent(env)
state = env.reset()

for frame in range(max_frames):
    env.render()
    action = agent.get_action(state)
    next_state, reward, done, _ = env.step(action)
    agent.replay_buffer.push(state, action, reward, next_state, done)

    if len(agent.replay_buffer) > batch_size:
        agent.update(batch_size)

    if done:
        state = env.reset()

env.close()
Beispiel #5
0
class Game():
    def __init__(self, width, height, rows, window, offx, offy, idx=""):
        self.SETTINGS = {}
        self.SETTINGS['w'] = width
        self.SETTINGS['h'] = height
        self.SETTINGS['r'] = rows
        self.SETTINGS['sB'] = width // rows
        self.SETTINGS['ox'] = offx * width
        self.SETTINGS['oy'] = offy * height
        self.idx = idx

        self.window = window

        self.snake = Snake((255, 0, 0),
                           (self.SETTINGS['r'] // 2, self.SETTINGS['r'] // 2),
                           self.SETTINGS)
        self.snack = Cube(self.randomSnack(), self.SETTINGS, color=(0, 255, 0))

        self.dist = self.get_snack_distance()

        self.walls = self.get_wall_pos()

        self.model = Model(len(self.get_observation()), 4)
        self.tgt = Model(len(self.get_observation()), 4)
        self.agent = DQNAgent(self.model, self.tgt)
        self.reward = 0.0
        self.setp_reward = 0.0
        self.rewards = []
        self.finished = False

        self.points = 0
        self.points_ls = []

    def get_wall_pos(self):
        pos = []

        for l in range(self.SETTINGS['r']):
            pos.append((l, 0))
            pos.append((l, self.SETTINGS['r'] - 1))
            pos.append((0, l))
            pos.append((self.SETTINGS['r'] - 1, l))

        return pos

    def get_snack_distance(self):
        x = self.snake.head.pos[0] - self.snack.pos[0]
        y = self.snake.head.pos[1] - self.snack.pos[1]
        dist = math.sqrt(x * x + y * y)
        return dist

    def draw_wall(self):
        for l in self.walls:
            offx = self.SETTINGS['ox']
            offy = self.SETTINGS['oy']
            dis = self.SETTINGS['sB']
            i = l[0]
            j = l[1]

            pygame.draw.rect(
                self.window, (0, 0, 255),
                (i * dis + 1 + offx, j * dis + 1 + offy, dis - 2, dis - 2))

    def draw_grid(self):
        x = 0
        y = 0

        for l in range(self.SETTINGS['r']):
            offx = self.SETTINGS['ox']
            offy = self.SETTINGS['oy']
            x += self.SETTINGS['sB']
            y += self.SETTINGS['sB']

            pygame.draw.line(self.window, (30, 30, 30), (x + offx, 0 + offy),
                             (x + offx, self.SETTINGS['w'] + offy))
            pygame.draw.line(self.window, (30, 30, 30), (0 + offx, y + offy),
                             (self.SETTINGS['h'] + offx, y + offy))

    def redrawWindow(self):
        self.draw_grid()
        self.draw_wall()

        self.snake.draw(self.window)
        self.snack.draw(self.window)

    def randomSnack(self):

        while True:
            x = random.randrange(self.SETTINGS['r'])
            y = random.randrange(self.SETTINGS['r'])

            new_pos = (x, y)

            if new_pos not in self.snake.get_pos():
                if new_pos not in self.get_wall_pos():
                    return new_pos

    def restart(self):
        self.snake = Snake((255, 0, 0),
                           (self.SETTINGS['r'] // 2, self.SETTINGS['r'] // 2),
                           self.SETTINGS)

        self.snack = Cube(self.randomSnack(), self.SETTINGS, color=(0, 255, 0))
        while self.snake.head.pos == self.snack.pos:
            self.snack = Cube(self.randomSnack(),
                              self.SETTINGS,
                              color=(0, 255, 0))

        self.rewards.append(self.reward)
        self.reward = 0.0
        self.points_ls.append(self.points)
        self.points = 0
        self.finished = False

    def get_average_reward(self):
        avg = sum(self.rewards) / len(self.rewards)
        self.rewards = []
        return avg

    def new_generation(self, model, tgt):
        self.agent.update_model(model)
        self.agent.update_tgt(tgt)
        self.restart()

    def game_loop(self, train=False, model=None):
        observation = self.get_observation()

        self.reward -= 0.003
        self.setp_reward = -0.003

        if train:
            action = self.agent.get_action(observation)

            self.snake.move_model(action)
        else:
            assert model != None, "Error, no model"

            action = model(torch.Tensor(observation)).max(-1)[-1].item()
            self.snake.move_model(action)

        if self.snake.body[0].pos == self.snack.pos:
            self.snake.add_cube()
            self.snack = Cube(self.randomSnack(),
                              self.SETTINGS,
                              color=(0, 255, 0))

            self.reward += 3.0
            self.setp_reward = 3.0
            self.points += 1

        for el in self.snake.body[1:]:
            if el.pos == self.snake.head.pos:
                self.snake = Snake(
                    (255, 0, 0),
                    (self.SETTINGS['r'] // 2, self.SETTINGS['r'] // 2),
                    self.SETTINGS)

                self.reward -= 1.0
                self.setp_reward = -1.0
                self.finished = True
                break

        for el in self.walls:
            if el == self.snake.head.pos:
                self.snake = Snake(
                    (255, 0, 0),
                    (self.SETTINGS['r'] // 2, self.SETTINGS['r'] // 2),
                    self.SETTINGS)

                self.reward -= 1.0
                self.setp_reward = -1.0
                self.finished = True
                break

        if train:
            new_observation = self.get_observation()
            self.agent.step(self.setp_reward, new_observation, self.finished)

        if self.finished:
            self.restart()

        self.redrawWindow()

    def body_in_pos(self, x, y):
        for bd in self.snake.body[1:]:
            if bd.pos == [x, y]:
                return True

        return False

    def get_observation(self):

        obs = []
        # snake head, and tail pos
        sx = self.snake.head.pos[0]
        sy = self.snake.head.pos[1]
        obs.append(sx)
        obs.append(sy)
        obs.append(self.snake.body[-1].pos[0])
        obs.append(self.snake.body[-1].pos[1])

        # snack pos
        obs.append(self.snack.pos[0])
        obs.append(self.snack.pos[1])

        # snack dist from head
        x = self.snack.pos[0] - sx
        y = self.snack.pos[1] - sy
        dist = math.sqrt(x**2 + y**2)
        obs.append(dist)

        # head dist from closes obsticle (wall || body)
        # -x dir
        if self.body_in_pos(sx - 1, sy) or sx - 1 == 0:
            obs.append(0)
        else:
            obs.append(1)

        # +x dir
        if self.body_in_pos(sx + 1, sy) or sx + 2 == self.SETTINGS['r']:
            obs.append(0)
        else:
            obs.append(1)

        # -y dir
        if self.body_in_pos(sx, sy - 1) or sy - 1 == 0:
            obs.append(0)
        else:
            obs.append(1)

        # +y dir
        if self.body_in_pos(sx, sy + 1) or sy + 2 == self.SETTINGS['r']:
            obs.append(0)
        else:
            obs.append(1)

        return obs

    def start(self):
        clock = pygame.time.Clock()
        while True:
            pygame.time.delay(50)
            clock.tick(10)

            self.game_loop()
Beispiel #6
0
                        screen.blit(label, (40, 10))
                        game_over = True

                    turn += 1
                    turn = turn % 2

                    print_board(board)
                    draw_board(board)

    # # Ask for Player 2 Input
    if turn == AI and not game_over:

        #col = random.randint(0, COLUMN_COUNT-1)
        #col = pick_best_move(board, AI_PIECE)
        #col, minimax_score = minimax(board, 5, -math.inf, math.inf, True)
        col = player2.get_action(board)
        print('col:', col)

        if not is_valid_location(board, col):
            label = myfont.render("Player 2 disqualified!!", 1, YELLOW)
            screen.blit(label, (40, 10))
            game_over = True

        if is_valid_location(board, col):
            #pygame.time.wait(500)
            row = get_next_open_row(board, col)
            drop_piece(board, row, col, AI_PIECE)

            if winning_move(board, AI_PIECE):
                label = myfont.render("Player 2 wins!!", 1, YELLOW)
                screen.blit(label, (40, 10))
                 exploration_steps=500000,
                 observation_steps=50000,
                 loading_step=None,
                 device_name="gpu:0")

verbose_step = 20
total_reward = 0.0
episode_step = 0
time_step = time.time()

for i_episode in range(100000):
    observation = env.reset()
    for t in range(10000000):
        # env.render()
        now_state = preprocess(observation)
        action = agent.get_action(now_state, training=True).numpy()
        observation, reward, done, info = env.step(action)

        if (done):
            done = 1
        else:
            done = 0

        next_state = preprocess(observation)
        agent.step(now_state, action, reward, next_state, done)
        total_reward += reward
        if done:
            if agent.step_count > agent.observation_steps:
                agent.copy_base_to_target()

            if i_episode % verbose_step == 0: