def __init__(self): self.env = cpommerman.make() self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') print(f"Using device: {self.device}") self.net = Net().to(self.device) self.net.eval() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=3e-4) self.oracle = FDTS(100, 20) self.buffer = deque(maxlen=10**5)
def runner(id, num_episodes, args, fifo): env = cpommerman.make() if args.planner == 'fdts': planner = FDTS(args.n_simulations, args.horizon, args.mab) elif args.planner == 'mcts': planner = MCTS(args.n_simulations, args.mab) elif args.planner == 'mcs': planner = MCS(args.n_simulations, args.horizon, args.mab) else: raise Exception('Unknown planner') outcomes = [0, 0, 0] # wins, draws, losses for i in range(num_episodes): env.reset() planner.reset() planner_id = np.random.randint(4) simple = JointSimpleAgent(planner_id) while not env.get_done(): obses = env.get_observations() actions = simple.step(obses) planner_actions = planner.step(env, env.get_legal_actions()) actions[planner_id] = planner_actions[planner_id] state_str = planner.get_state_str(env.get_json_info()) env.step(actions) rewards = env.get_rewards() if rewards[planner_id] == 1: # win idx = 0 elif sum(rewards) == -4: # draw idx = 1 else: # loss idx = 2 outcomes[idx] += 1 fifo.put(idx)
parser = argparse.ArgumentParser() parser.add_argument('--num_episodes', type=int, default=1) parser.add_argument('--render', action="store_true", default=False) args = parser.parse_args() # set up two environments, agents are only in old env agents = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] env_old = pommerman.make('PommeFFACompetition-v0', agents) env_new = cpommerman.make() total_time_old = 0 total_time_new = 0 total_state_len = 0 max_state_len = 0 n = 0 for i in range(args.num_episodes): obs_old = env_old.reset() # just to test for errors in reset code, # impossible to verify because random seed is not implemented in both envs obs_new = env_new.reset() done_old = False t = 0 while not done_old:
def __init__(self, model_file, agent_id=0): super().__init__() self.model = load_model(model_file) self.agent_id = agent_id self.env = cpommerman.make() self.reset_tree()
def __init__(self, model, agent_id=0): super().__init__() self.model = model self.agent_id = agent_id self.env = cpommerman.make() self.reset_tree()
import cpommerman import numpy as np import time env = cpommerman.make() start_time = time.time() steps = 0 for i in range(1000): env.reset() done = False while not done: #state = env.get_state() #obs = env.get_observations() features = env.get_features() # use features, observations or state to produce action actions = np.random.randint(6, size=4, dtype=np.uint8) env.step(actions) rewards = env.get_rewards() done = env.get_done() steps += 1 elapsed = time.time() - start_time print("Time:", elapsed, "Steps:", steps, "Time per step:", elapsed / steps, "FPS:", steps / elapsed)