def simulate_random_agent(games): data = { "actions": [], "rewards": np.zeros(NUM_STEPS) } for g in tqdm(range(NUM_GAMES), desc="Random Agent"): agent = RandomAgent(NUM_ARMS, NUM_STEPS) game = games[g] actions, rewards = agent.play(game) data["actions"].extend(actions) data["rewards"] += rewards # Convert sum to average reward per step. data["rewards"] /= NUM_GAMES return data
def play_random(params_path): Parameters.load(params_path) environment = Environment() agent = RandomAgent(environment) all_scores = agent.play() print('mean: ', np.mean(all_scores), '\tstd: ', np.std(all_scores))