agent = Agent(gamma=GAMMA, epsilon=EPSILON, batch_size=BATCH_SIZE, max_mem_size=500000,
                  n_actions=NUMBER_OF_ACTIONS, eps_end=0.01, input_dims=[2], lr=LR)
    scores, eps_history = [], []
    n_runs = 10
    df = pd.DataFrame()
    for i in range(n_runs):
        score = 0
        done = False
        observation = env.reset()
        run = []
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.store_transition(observation, action, reward, observation_, done)
            agent.learn()
            observation = observation_
            run.append(observation)

        scores.append(score)
        eps_history.append(agent.epsilon)
        avg_score = np.mean(scores[-100:])

        run_name = str(uuid.uuid4())
        x = pd.DataFrame()
        x[run_name] = run
        df = pd.concat([df, x], axis=1)
        run.clear()

        print(f"Episode: {i}",
              f"Score: {score}",