예제 #1
0
def simulate_random_agent(games):
    data = {
        "actions": [],
        "rewards": np.zeros(NUM_STEPS)
    }
    for g in tqdm(range(NUM_GAMES), desc="Random Agent"):
        agent = RandomAgent(NUM_ARMS, NUM_STEPS)
        game = games[g]

        actions, rewards = agent.play(game)

        data["actions"].extend(actions)
        data["rewards"] += rewards

    # Convert sum to average reward per step.
    data["rewards"] /= NUM_GAMES

    return data
예제 #2
0
def play_random(params_path):
    Parameters.load(params_path)
    environment = Environment()
    agent = RandomAgent(environment)
    all_scores = agent.play()
    print('mean: ', np.mean(all_scores), '\tstd: ', np.std(all_scores))