agent = Agent(gamma=GAMMA, epsilon=EPSILON, batch_size=BATCH_SIZE, max_mem_size=500000, n_actions=NUMBER_OF_ACTIONS, eps_end=0.01, input_dims=[2], lr=LR) scores, eps_history = [], [] n_runs = 10 df = pd.DataFrame() for i in range(n_runs): score = 0 done = False observation = env.reset() run = [] while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) agent.learn() observation = observation_ run.append(observation) scores.append(score) eps_history.append(agent.epsilon) avg_score = np.mean(scores[-100:]) run_name = str(uuid.uuid4()) x = pd.DataFrame() x[run_name] = run df = pd.concat([df, x], axis=1) run.clear() print(f"Episode: {i}", f"Score: {score}",