def training(**kwargs):
    # Set logging level
    if kwargs['debug']:
        LOGGER.setLevel(logging.DEBUG)
    else:
        LOGGER.setLevel(logging.INFO)

    agent = DQNAgent(environment=env,
                     action_space=[0, 1, 2, 3, 4, 5, 6, 7],
                     NN_arch=kwargs['NN_arch'],
                     maxIters=kwargs['max_iters'],
                     eta=0.00001,
                     epsilon=0.4,
                     discount=0.95,
                     weights_dir=kwargs['weights_dir'],
                     mem_size=10**5)

    while True:
        agent.learn(replay=kwargs['replay'],
                    frame_skipping=kwargs['frame_skipping'],
                    batch_size=kwargs['batch_size'])
        if agent.numIters > agent.maxIters:
            break

    agent.save(agent.save_path % kwargs['max_iters'])
    # return the agent object
    return agent
    scores, eps_history, steps_array = [], [], []

    for i in range(n_games):
        done = False
        observation = env.reset()

        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward

            if not load_checkpoint:
                agent.store_transition(observation, action, reward,
                                       observation_, done)
                agent.learn()
            observation = observation_
            n_steps += 1
        scores.append(score)
        steps_array.append(n_steps)

        avg_score = np.mean(scores[-100:])
        print('episode: ', i, 'score: ', score,
              ' average score %.1f' % avg_score,
              'best score %.2f' % best_score, 'epsilon %.2f' % agent.epsilon,
              'steps', n_steps)

        if avg_score > best_score:
            if not load_checkpoint:
                agent.save_models()
            best_score = avg_score
def dqn_train():
    env = make_env('PongNoFrameskip-v4')

    load_checkpoint = False
    save_checkpoint = True
    learning_enabled = True
    rendering_enabled = False

    n_games = 100
    agent = DQNAgent(gamma=0.99,
                     epsilon=1.0,
                     lr=0.0001,
                     input_dims=(env.observation_space.shape),
                     n_actions=env.action_space.n,
                     mem_size=50000,
                     eps_min=0.1,
                     batch_size=32,
                     replace=1000,
                     eps_dec=1e-5,
                     chkpt_dir='models/',
                     algo='DQNAgent',
                     env_name='PongNoFrameskip-v4')

    if load_checkpoint:
        agent.load_models()
        with open('models/best_score.pkl', 'rb') as file:
            best_score = pickle.load(file)
    else:
        best_score = -np.inf

    fname = agent.algo + '_' + agent.env_name + '_lr' + str(
        agent.lr) + '_' + str(n_games) + 'games'
    figure_file = 'plots/' + fname + '.png'

    n_steps = 0
    scores, eps_history, steps_array = [], [], []

    for i in range(n_games):
        done = False
        observation = env.reset()
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            if learning_enabled:
                agent.store_transition(observation, action, reward,
                                       observation_, int(done))
                agent.learn()
            if rendering_enabled:
                env.render()
            observation = observation_
            n_steps += 1
        scores.append(score)
        steps_array.append(n_steps)

        avg_score = np.mean(scores[-100:])
        print('episode: ', i, 'score: ', score,
              ' average score %.1f' % avg_score,
              'best score %.2f' % best_score, 'epsilon %.2f' % agent.epsilon,
              'steps', n_steps)

        if avg_score > best_score:
            best_score = avg_score
            if save_checkpoint:
                agent.save_models()
                with open('models/best_score.pkl', 'wb') as file:
                    pickle.dump(best_score, file)

        eps_history.append(agent.epsilon)
        if load_checkpoint and n_steps >= 18000:
            break

    x = [i + 1 for i in range(len(scores))]
    plot_learning_curve(steps_array, scores, eps_history, figure_file)