Example #1
0
def dqn():
    env = Tetris()
    episodes = 2000
    max_steps = None
    epsilon_stop_episode = 1500
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 50
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size)

    log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []

    current_state = env.reset()
    done = False
    steps = 0

    # if render_every and episode % render_every == 0:
    #   render = True
    # else:
    render = True
    actions = []

    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0

        # if render_every and episode % render_every == 0:
        #   render = True
        # else:
        render = False
        actions = []

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)
            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            actions.append(best_action)
            steps += 1
        scores.append(env.get_game_score())

        # Train
        if episode % train_every == 0:
            agent.train(batch_size=batch_size, epochs=epochs)

        # Logs
        if log_every and episode and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])

            log.log(episode,
                    avg_score=avg_score,
                    min_score=min_score,
                    max_score=max_score)

    print(agent.model.evaluate(current_state))
    agent.model.save_weights("ia_tetris_weights.h5")
    while True:
        current_state = env.reset()
        done = False
        steps = 0
        render = True

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)

            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            actions.append(best_action)
            steps += 1

        scores.append(env.get_game_score())
Example #2
0
def dqn():
    env = Tetris()
    episodes = 2000
    max_steps = None
    epsilon_stop_episode = 1500
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 50
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [64, 32, 16]
    render_delay = None
    activations = ['relu', 'relu', 'relu', 'linear']

    agent = DQNAgent(
        env.get_state_size(),
        epsilon=0,
        n_neurons=n_neurons,
        activations=activations,
        epsilon_stop_episode=epsilon_stop_episode,
        mem_size=mem_size,
        discount=discount,
        replay_start_size=replay_start_size,
    )

    log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []
    scores_sum = 0
    score_max = 0

    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            # print('\n\n', next_states)
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)

            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            steps += 1

        score = env.get_game_score()
        scores.append(score)
        scores_sum += score
        if score > score_max:
            score_max = score

        if episode != 0 and episode % render_every == 0:
            # print('SCORES SUM:', scores_sum, 'AVG:', scores_sum / render_every, 'MAX:', score_max)
            scores_sum = 0
            score_max = 0

        # Train
        # if episode % train_every == 0:
        #    agent.train(batch_size=batch_size, epochs=epochs)

        print('Done!')
        sleep(30)

        # Logs
        if log_every and episode and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])

            log.log(episode,
                    avg_score=avg_score,
                    min_score=min_score,
                    max_score=max_score)
Example #3
0
def dqn():
    env = Tetris()
    episodes = 2000
    max_steps = 1000000000
    epsilon_stop_episode = 1750
    mem_size = 20000
    discount = 0.95
    batch_size = 1024
    epochs = 1
    render_every = 1
    log_every = 1
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = 0.01
    activations = ['relu', 'relu', 'linear']
    m = 0

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons, activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size,
                     discount=discount, replay_start_size=replay_start_size)

    log_dir = f'logs/tetris-eps={episodes}-e-stop={epsilon_stop_episode}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []
    steps_list = []

    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0
        
        if (render_every and episode % render_every == 0) or episode == (episodes - 1):
            render = True
            record = True
        else:
            render = False
            record = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())
            
            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0], best_action[1], episode, render=render,
                                    render_delay=render_delay, record=record)
            
            agent.add_to_memory(current_state, next_states[best_action], reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())
        steps_list.append(steps)

        # Train
        if episode % train_every == 0:
            agent.train(batch_size=batch_size, epochs=epochs)

        # Logs
        if log_every and episode and episode % log_every == 0:
            score = scores[-log_every]
            steps = steps_list[-log_every]

            log.log(episode, score = score, steps = steps)
Example #4
0
def dqn(conf: AgentConf):
    env = Tetris()

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=conf.n_neurons,
                     activations=conf.activations,
                     epsilon=conf.epsilon,
                     epsilon_min=conf.epsilon_min,
                     epsilon_stop_episode=conf.epsilon_stop_episode,
                     mem_size=conf.mem_size,
                     discount=conf.discount,
                     replay_start_size=conf.replay_start_size)

    timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S")
    # conf.mem_size = mem_size
    # conf.epochs = epochs
    # conf.epsilon_stop_episode = epsilon_stop_episode
    # conf.discount = discount
    log_dir = f'logs/tetris-{timestamp_str}-ms{conf.mem_size}-e{conf.epochs}-ese{conf.epsilon_stop_episode}-d{conf.discount}'
    log = CustomTensorBoard(log_dir=log_dir)

    print(f"AGENT_CONF = {log_dir}")

    scores = []

    episodes_wrapped: Iterable[int] = tqdm(range(conf.episodes))
    for episode in episodes_wrapped:
        current_state = env.reset()
        done = False
        steps = 0

        # update render flag
        render = True if conf.render_every and episode % conf.render_every == 0 else False

        # game
        while not done and (not conf.max_steps or steps < conf.max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            # find the action, that corresponds to the best state
            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.hard_drop([best_action[0], 0],
                                         best_action[1],
                                         render=render)

            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            steps += 1

        # just return score
        scores.append(env.get_game_score())

        # train
        if episode % conf.train_every == 0:
            # n = len(agent.memory)
            # print(f" agent.memory.len: {n}")
            agent.train(batch_size=conf.batch_size, epochs=conf.epochs)

        # logs
        if conf.log_every and episode and episode % conf.log_every == 0:
            avg_score = mean(scores[-conf.log_every:])
            min_score = min(scores[-conf.log_every:])
            max_score = max(scores[-conf.log_every:])
            log.log(episode,
                    avg_score=avg_score,
                    min_score=min_score,
                    max_score=max_score)
    # save_model
    save_model(agent.model,
               f'{log_dir}/model.hdf',
               overwrite=True,
               include_optimizer=True)
Example #5
0
def dqn():
    env = Tetris()
    episodes = 2000
    max_steps = None
    epsilon_stop_episode = 500
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 50
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size)

    log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []
    _max_height = True
    _min_height = True
    _current_piece = False
    _next_piece = False
    _max_bumpiness = False
    _lines = False
    _holes = True
    _total_bumpiness = True
    _sum_height = False

    for episode in tqdm(range(episodes)):
        current_state = env.reset(_max_height, _min_height, _current_piece,
                                  _next_piece, _max_bumpiness, _lines, _holes,
                                  _total_bumpiness, _sum_height)
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            # No params for default
            next_states = env.get_next_states(_max_height, _min_height,
                                              _current_piece, _next_piece,
                                              _max_bumpiness, _lines, _holes,
                                              _total_bumpiness, _sum_height)
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)

            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # Train
        if episode % train_every == 0:
            agent.train(batch_size=batch_size, epochs=epochs)

        # Logs
        if log_every and episode and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])
            cleared_lines = env.get_lines()

            log.log(episode,
                    avg_score=avg_score,
                    min_score=min_score,
                    max_score=max_score,
                    cleared_lines=cleared_lines)
Example #6
0
def dqn():
    env = Tetris()
    episodes = 500
    max_steps = None
    epsilon_stop_episode = int(episodes * 0.75)
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 50
    log_every = 25
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']
    dqn_num = 3
    filepaths = "tetris-nn_" + str(dqn_num) + "-.h5"
    # filepaths = ["tetris-nn_"+str(dqn_num)+"-"+str(i)+".h5" for i in range(0,10)]
    save = len(filepaths)
    save_every = episodes / save
    log_fp = "log.txt"
    csv_fp = "dqn_" + str(dqn_num) + "_training.csv"
    log = open(log_fp, "a")
    log.write("\ntetris-nn=" + str(n_neurons) + "-mem=" + str(mem_size) +
              "-bs=" + str(batch_size) + "-e=" + str(epochs) + "-" +
              str(datetime.now().strftime("%Y%m%d-%H%M%S")) + "\n\n")
    log.close()

    agent = DQNAgent(env.get_action_space(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size)

    scores = []

    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.step(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)

            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # Train
        if episode % train_every == 0:
            agent.train(batch_size=batch_size, epochs=epochs)

        # Save
        if (episode + 1) % save_every == 0:
            agent.save(filepaths)
            # agent.save(filepaths[save-10])
            save += 1

        # Logs
        if log_every and episode and (episode + 1) % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])

            log = open(log_fp, "a")
            logging = "episode: "+str(episode+1)+", avg_score: "+str(avg_score)+", min_score: "+\
                 str(min_score)+", max_score: "+str(max_score)+"\n"
            log.write(logging)
            log.close()

    log = open(log_fp, "a")
    log.write(
        "\n------------------------------------------------------------------------------------------------"
        + "\n")
    log.close()

    df = pd.DataFrame(scores)
    df.to_csv(csv_fp)
Example #7
0
def dqn():
    env = TetrisApp(8, 16, 750, False, 40, 30 * 100)
    episodes = 5000
    max_steps = None
    epsilon_stop_episode = 1500
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 50
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size)

    # log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    # log = CustomTensorBoard(log_dir=log_dir)

    scores = []
    env.pcrun()
    for episode in tqdm(range(episodes)):
        env.reset()
        current_state = env._get_board_props(env.board)
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.pcplace(best_action[0], best_action[1])

            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # Train
        if episode % train_every == 0:
            agent.train(batch_size=batch_size, epochs=epochs)

        # Logs
        # if log_every and episode and episode % log_every == 0:
        #     avg_score = mean(scores[-log_every:])
        #     min_score = min(scores[-log_every:])
        #     max_score = max(scores[-log_every:])

        #     log.log(episode, avg_score=avg_score, min_score=min_score,
        #             max_score=max_score)
    plt.xlabel("Episodes")
    plt.ylabel('Average score over 30 episodes')
    plt.grid()
    plt.plot(np.linspace(30, episodes, episodes - 29),
             moving_average(scores, 30))
    plt.savefig("nlinker.png")
Example #8
0
def dqn():
    trainingAgent = False
    trainingHater = False
    env = Tetris(trainingAgent or trainingHater)
    episodes = 2000
    max_steps = None
    epsilon_stop_episode = 1500
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 200 if (trainingAgent or trainingHater) else 10
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']
    agent_save_filepath = "keras_saved_maxbump.h5"
    # hater_save_filepath = "hater_changed_reward.h5"
    hater_save_filepath = "hater_best.h5"

    # Avg 135 || reward function = 1 + (lines_cleared ** 2)*self.BOARD_WIDTH - (.1)*self._bumpiness(self.board)[0]/self.BOARD_WIDTH
    # 200 death penalty
    # agent_save_filepath = "keras_saved_maxbump.h5"

    # Avg 25 || reward function = 1 + (lines_cleared ** 2)*self.BOARD_WIDTH
    # 2 death penalty
    # agent_save_filepath = "keras_saved.h5"

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size,
                     training=trainingAgent,
                     agent_save_filepath=agent_save_filepath)

    hateris = DQNAgent(env.get_state_size(),
                       n_neurons=n_neurons,
                       activations=activations,
                       epsilon_stop_episode=epsilon_stop_episode,
                       mem_size=mem_size,
                       discount=discount,
                       replay_start_size=replay_start_size,
                       training=trainingHater,
                       agent_save_filepath=hater_save_filepath)
    env.hater = hateris

    log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []

    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)
            if len(current_state
                   ) == env.get_state_size() - 1 and trainingAgent:
                toBeAdded = current_state + [env.next_piece]
            elif len(current_state
                     ) == env.get_state_size() - 1 and trainingHater:
                toBeAdded = current_state + [env.current_piece]
            else:
                toBeAdded = current_state
            if trainingAgent:
                agent.add_to_memory(toBeAdded, next_states[best_action],
                                    reward, done)
            if trainingHater:
                hateris.add_to_memory(toBeAdded, next_states[best_action],
                                      -reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # Train
        if episode % train_every == 0 and trainingAgent:
            agent.train(batch_size=batch_size, epochs=epochs)
        if episode % train_every == 0 and trainingHater:
            hateris.train(batch_size=batch_size, epochs=epochs)

        # Logs
        if log_every and episode and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])
            std_score = stdev(scores[-log_every:])
            print(
                str(episode) + " Avg: " + str(avg_score) + "   Min: " +
                str(min_score) + "   Max: " + str(max_score) + "   Std: " +
                str(round(std_score, 2)))

        if episode == epsilon_stop_episode and trainingAgent:
            agent.save_agent("agent_stopEps.h5")
        if episode == epsilon_stop_episode and trainingHater:
            hateris.save_agent("hater_stopEps.h5")

    if trainingAgent: agent.save_agent("real_agent.h5")
    if trainingHater: hateris.save_agent("real_hater.h5")
    plt.plot(scores)
    plt.show()
Example #9
0
def dqn():
    training = False
    env = Tetris(training)
    episodes = 2000
    max_steps = None
    epsilon_stop_episode = 1500
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 200 if training else 10
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']
    #agent_save_filepath = "keras_saved_maxbump.h5"

    # with open("saved_agent", "rb") as input_file:
    #     agent = pickle.load(input_file)
    #     agent.epsilon = 0

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size,
                     training=training,
                     agent_save_filepath=agent_save_filepath)

    log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []

    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)
            if len(current_state) == env.get_state_size() - 1:
                toBeAdded = current_state + [env.next_piece]
            else:
                toBeAdded = current_state
            if training:
                agent.add_to_memory(toBeAdded, next_states[best_action],
                                    reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # Train
        if episode % train_every == 0 and training:
            agent.train(batch_size=batch_size, epochs=epochs)

        # Logs
        if log_every and episode and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])
            std_score = stdev(scores[-log_every:])
            print(
                str(episode) + " Avg: " + str(avg_score) + "   Min: " +
                str(min_score) + "   Max: " + str(max_score) + "   Std: " +
                str(round(std_score, 2)))

        if episode == epsilon_stop_episode:
            agent.save_agent("keras_saved_stopEps.h5")

    if training: agent.save_agent("keras_saved.h5")
    plt.plot(scores)
    plt.show()
Example #10
0
                 replay_start_size=replay_start_size)

while True:
    current_state = env.reset()
    done = False
    steps = 0
    render = True

    # Game
    while not done and (not max_steps or steps < max_steps):
        next_states = env.get_next_states()
        best_state = agent.best_state(next_states.values())

        best_action = None
        for action, state in next_states.items():
            if state == best_state:
                best_action = action
                break

        reward, done = env.play(best_action[0],
                                best_action[1],
                                render=render,
                                render_delay=render_delay)

        agent.add_to_memory(current_state, next_states[best_action], reward,
                            done)
        current_state = next_states[best_action]
        actions.append(best_action)
        steps += 1

    scores.append(env.get_game_score())