Exemple #1
0
    def __init__(self):
        self.env = cpommerman.make()

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")
        self.net = Net().to(self.device)
        self.net.eval()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=3e-4)

        self.oracle = FDTS(100, 20)
        self.buffer = deque(maxlen=10**5)
Exemple #2
0
def runner(id, num_episodes, args, fifo):
    env = cpommerman.make()

    if args.planner == 'fdts':
        planner = FDTS(args.n_simulations, args.horizon, args.mab)
    elif args.planner == 'mcts':
        planner = MCTS(args.n_simulations, args.mab)
    elif args.planner == 'mcs':
        planner = MCS(args.n_simulations, args.horizon, args.mab)
    else:
        raise Exception('Unknown planner')

    outcomes = [0, 0, 0]  # wins, draws, losses
    for i in range(num_episodes):
        env.reset()
        planner.reset()
        planner_id = np.random.randint(4)
        simple = JointSimpleAgent(planner_id)

        while not env.get_done():
            obses = env.get_observations()
            actions = simple.step(obses)
            planner_actions = planner.step(env, env.get_legal_actions())
            actions[planner_id] = planner_actions[planner_id]

            state_str = planner.get_state_str(env.get_json_info())

            env.step(actions)

        rewards = env.get_rewards()
        if rewards[planner_id] == 1:  # win
            idx = 0
        elif sum(rewards) == -4:  # draw
            idx = 1
        else:  # loss
            idx = 2
        outcomes[idx] += 1
        fifo.put(idx)
Exemple #3
0

parser = argparse.ArgumentParser()
parser.add_argument('--num_episodes', type=int, default=1)
parser.add_argument('--render', action="store_true", default=False)
args = parser.parse_args()

# set up two environments, agents are only in old env
agents = [
    agents.SimpleAgent(),
    agents.SimpleAgent(),
    agents.SimpleAgent(),
    agents.SimpleAgent(),
]
env_old = pommerman.make('PommeFFACompetition-v0', agents)
env_new = cpommerman.make()

total_time_old = 0
total_time_new = 0
total_state_len = 0
max_state_len = 0

n = 0
for i in range(args.num_episodes):
    obs_old = env_old.reset()
    # just to test for errors in reset code,
    # impossible to verify because random seed is not implemented in both envs
    obs_new = env_new.reset()
    done_old = False
    t = 0
    while not done_old:
Exemple #4
0
 def __init__(self, model_file, agent_id=0):
     super().__init__()
     self.model = load_model(model_file)
     self.agent_id = agent_id
     self.env = cpommerman.make()
     self.reset_tree()
 def __init__(self, model, agent_id=0):
     super().__init__()
     self.model = model
     self.agent_id = agent_id
     self.env = cpommerman.make()
     self.reset_tree()
Exemple #6
0
import cpommerman
import numpy as np
import time

env = cpommerman.make()

start_time = time.time()
steps = 0
for i in range(1000):
    env.reset()
    done = False
    while not done:
        #state = env.get_state()
        #obs = env.get_observations()
        features = env.get_features()
        # use features, observations or state to produce action
        actions = np.random.randint(6, size=4, dtype=np.uint8)
        env.step(actions)
        rewards = env.get_rewards()
        done = env.get_done()
        steps += 1

elapsed = time.time() - start_time
print("Time:", elapsed, "Steps:", steps, "Time per step:", elapsed / steps, "FPS:", steps / elapsed)