Esempio n. 1
0
def train(game):
    agent = DQN(game)

    for i in tqdm(range(TRAIN_GAMES)):
        game.new_episode()
        previous_variables = None
        previous_img = None
        done = False
        local_history = []
        total_reward = 0
        while not done:
            state = game.get_state()

            img = state.screen_buffer
            variables = state.game_variables
            if previous_variables is None:
                previous_variables = variables
            if previous_img is None:
                previous_img = img

            action = agent.act(img)
            reward = game.make_action(action)
            done = game.is_episode_finished()
            reward = (reward + calculate_additional_reward(previous_variables, variables)) / 100
            total_reward += reward
            local_history.append([previous_img, img, reward, action, done])
            previous_variables = variables
            previous_img = img

        if total_reward >= 0:
            for previous_state, state, reward, action, done in local_history:
                agent.remember(previous_state, state, reward, action, done)
            agent.train()
Esempio n. 2
0
def run(ep,train=False):
    pygame.init()
    loss=[]
    agent = DQN(3, 5)
    env=pongGame()
    weights_filepath = 'PongGame.h5'
    if train==False:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")
    for e in range(ep):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        state = env.reset()
        state = np.reshape(state, (1, 5))
        score = 0
        max_steps = 1000
        for i in range(max_steps):
            action = agent.act(state)
            reward, next_state, done = env.step(action)
            score += reward
            next_state = np.reshape(next_state, (1, 5))
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if train==True:
                agent.replay()
            if done:
                print("episode: {}/{}, score: {}".format(e, ep, score))
                break
        loss.append(score)
    if train:
        agent.model.save_weights("PongGame.h5")
    return loss
Esempio n. 3
0
def play(game):
    agent = DQN(game, use_saved=True)
    for i in tqdm(range(PLAY_GAMES)):
        game.new_episode()
        done = False
        while not done:
            state = game.get_state()
            img = state.screen_buffer
            action = agent.act(img)
            print(action)
            game.make_action(action)
            done = game.is_episode_finished()
    N_SAVE = 500
    env = gym.make('FlappyBird-v0')
    agent = DQN(env)
    scores = deque(maxlen=100)
    for i in range(N_EP):
        score = 0
        ob = env.reset()

        # Stack observations
        pre_ob = preprocess(ob)
        pre_ob = pre_ob.reshape(1, 100, 100)
        ob_stack = np.stack((pre_ob, ) * 4, -1)
        pre_ob = ob_stack

        while True:
            action = agent.act(pre_ob, step=i)

            ob, reward, done, _ = env.step(action)
            if reward <= -1:
                reward = -1

            next_pre_ob = preprocess(ob)

            # Stack observations
            next_pre_ob = next_pre_ob.reshape(1, 100, 100)
            ob_stack = np.insert(ob_stack, -1, next_pre_ob, axis=3)
            ob_stack = np.delete(ob_stack, 0, axis=3)
            next_pre_ob = ob_stack

            agent.remember(pre_ob, action, reward, next_pre_ob, done)
            agent.replay()