Ejemplo n.º 1
0
    def test_random_actions_with_boost(self):
        # num_envs = 1024*8
        num_envs = 256
        num_steps = 200
        num_snakes = 4
        # Create some environments and run random actions for N steps, checking for consistency at each step
        env = MultiSnake(num_envs=num_envs, num_snakes=num_snakes, size=25, manual_setup=False, boost=True, verbose=True,
                         render_args={'num_rows': 1, 'num_cols': 2, 'size': 256},
                         respawn_mode='any', food_mode='random_rate', boost_cost_prob=0.25,
                         observation_mode='partial_5', food_on_death_prob=0.33, food_rate=2.5e-4
                         )
        env.check_consistency()

        all_actions = {
            f'agent_{i}': torch.randint(8, size=(num_steps, num_envs)).long().to(DEFAULT_DEVICE) for i in
            range(num_snakes)
        }

        t0 = time()
        for i in range(all_actions['agent_0'].shape[0]):
            actions = {
                agent: agent_actions[i] for agent, agent_actions in all_actions.items()
            }
            observations, reward, done, info = env.step(actions)

            env.reset(done['__all__'], return_observations=False)
            env.check_consistency()
            print()

        t = time() - t0
        print(f'Ran {num_envs * num_steps} actions in {t}s = {num_envs * num_steps / t} actions/s')
Ejemplo n.º 2
0
    def test_random_actions(self):
        num_envs = 100
        num_steps = 100
        # Create some environments and run random actions for N steps, checking for consistency at each step
        env = MultiSnake(num_envs=num_envs, num_snakes=2, size=size, manual_setup=False, verbose=True,
                         render_args={'num_rows': 5, 'num_cols': 5, 'size': 128},
                         )
        env.check_consistency()

        all_actions = {
            'agent_0': torch.randint(4, size=(num_steps, num_envs)).long().to(DEFAULT_DEVICE),
            'agent_1': torch.randint(4, size=(num_steps, num_envs)).long().to(DEFAULT_DEVICE),
        }

        t0 = time()
        for i in range(all_actions['agent_0'].shape[0]):
            actions = {
                agent: agent_actions[i] for agent, agent_actions in all_actions.items()
            }
            observations, reward, done, info = env.step(actions)

            env.reset(done['__all__'])
            env.check_consistency()
            print()

        t = time() - t0
        print(f'Ran {num_envs * num_steps} actions in {t}s = {num_envs * num_steps / t} actions/s')
Ejemplo n.º 3
0
    def test_create_envs(self):
        # Create a large number of environments and check consistency
        env = MultiSnake(num_envs=512, num_snakes=2, size=size, manual_setup=False)
        env.check_consistency()

        _envs = torch.cat([
            env.foods.repeat_interleave(env.num_snakes, dim=0),
            env.heads,
            env.bodies
        ], dim=1)

        orientations = determine_orientations(_envs)
        self.assertTrue(torch.equal(env.orientations, orientations))
Ejemplo n.º 4
0
    def test_many_snakes(self):
        num_envs = 50
        num_steps = 10
        num_snakes = 4
        env = MultiSnake(num_envs=num_envs, num_snakes=num_snakes, size=size, manual_setup=False, boost=True)
        env.check_consistency()

        all_actions = {
            f'agent_{i}': torch.randint(8, size=(num_steps, num_envs)).long().to(DEFAULT_DEVICE) for i in range(num_snakes)
        }

        for i in range(all_actions['agent_0'].shape[0]):
            # env.render()
            actions = {
                agent: agent_actions[i] for agent, agent_actions in all_actions.items()
            }
            observations, reward, done, info = env.step(actions)
            env.reset(done['__all__'])
            env.check_consistency()
Ejemplo n.º 5
0
    def test_partial_observations(self):
        num_envs = 256
        num_snakes = 4
        observation_mode = 'partial_5'
        env = MultiSnake(num_envs=num_envs, num_snakes=num_snakes, size=25, manual_setup=False, boost=True,
                         observation_mode=observation_mode,
                         render_args={'num_rows': 1, 'num_cols': 2, 'size': 256},
                         )
        env.check_consistency()

        # render_envs = True
        observations = env._observe(observation_mode)
        if render_envs:
            fig, axes = plt.subplots(2, 2)
            i = 0
            # Show all the observations of the agent in the first env
            for k, v in observations.items():
                axes[i // 2, i % 2].imshow(v[0].permute(1, 2, 0).cpu().numpy())
                i += 1

            plt.show()

            env.render()
            sleep(5)
Ejemplo n.º 6
0
        num_residual_convs=2, num_feedforward=1, feedforward_dim=64).to(device=args.device, dtype=dtype)
    discrim_opt = optim.Adam(discriminator.parameters(), lr=args.lr, weight_decay=1e-5)


#################
# Configure Env #
#################
render_args = {
    'size': args.render_window_size,
    'num_rows': args.render_rows,
    'num_cols': args.render_cols,
}
if args.env == 'snake':
    env = MultiSnake(num_envs=args.n_envs, num_snakes=args.n_agents, food_on_death_prob=args.food_on_death,
                     size=args.size, device=args.device, render_args=render_args, boost=args.boost,
                     boost_cost_prob=args.boost_cost, dtype=dtype, food_rate=args.food_rate,
                     respawn_mode=args.respawn_mode, food_mode=args.food_mode, observation_mode=observation_type,
                     reward_on_death=args.reward_on_death, agent_colours=args.colour_mode)
else:
    raise ValueError('Unrecognised environment')


trajectories = TrajectoryStore()
ewm_tracker = ExponentialMovingAverageTracker(alpha=0.025)

episode_length = 0
num_episodes = 0
num_steps = 0
if args.save_logs:
    repo = git.Repo(search_parent_directories=True)
    sha = repo.head.object.hexsha
Ejemplo n.º 7
0
    def test_cant_boost_until_size_4(self):
        # Create a size 3 snake and try boosting with it
        env = MultiSnake(num_envs=1, num_snakes=2, size=size, manual_setup=True, boost=True)
        env.foods[:, 0, 1, 1] = 1
        # Snake 1
        env.heads[0, 0, 5, 5] = 1
        env.bodies[0, 0, 5, 5] = 3
        env.bodies[0, 0, 4, 5] = 2
        env.bodies[0, 0, 4, 4] = 1
        # Snake 2
        env.heads[1, 0, 8, 7] = 1
        env.bodies[1, 0, 8, 7] = 3
        env.bodies[1, 0, 8, 8] = 2
        env.bodies[1, 0, 8, 9] = 1

        # Get orientations manually
        _envs = torch.cat([
            env.foods.repeat_interleave(env.num_snakes, dim=0),
            env.heads,
            env.bodies
        ], dim=1)

        env.orientations = determine_orientations(_envs)

        expected_head_positions = torch.tensor([
            [6, 5],
            [6, 4],
            [5, 4],
        ])

        all_actions = {
            'agent_0': torch.tensor([4, 1, 2]).unsqueeze(1).long().to(DEFAULT_DEVICE),
            'agent_1': torch.tensor([0, 1, 3]).unsqueeze(1).long().to(DEFAULT_DEVICE),
        }

        print_or_render(env)

        for i in range(all_actions['agent_0'].shape[0]):
            actions = {
                agent: agent_actions[i] for agent, agent_actions in all_actions.items()
            }

            observations, rewards, dones, info = env.step(actions)

            env.reset(dones['__all__'])

            env.check_consistency()

            for i_agent in range(env.num_snakes):
                _env = torch.cat([
                    env.foods,
                    env.heads[i_agent].unsqueeze(0),
                    env.bodies[i_agent].unsqueeze(0)
                ], dim=1)

                head_position = torch.tensor([
                    head(_env)[0, 0].flatten().argmax() // size, head(_env)[0, 0].flatten().argmax() % size
                ])

                if i_agent == 0:
                    self.assertTrue(torch.equal(expected_head_positions[i], head_position))

            print_or_render(env)
Ejemplo n.º 8
0
def get_test_env(num_envs=1):
    env = MultiSnake(num_envs=num_envs, num_snakes=2, size=size, manual_setup=True)

    for i in range(num_envs):
        # Snake 1
        env.heads[2*i, 0, 5, 5] = 1
        env.bodies[2*i, 0, 5, 5] = 4
        env.bodies[2*i, 0, 4, 5] = 3
        env.bodies[2*i, 0, 4, 4] = 2
        env.bodies[2*i, 0, 4, 3] = 1
        # Snake 2
        env.heads[2*i+1, 0, 8, 7] = 1
        env.bodies[2*i+1, 0, 8, 7] = 4
        env.bodies[2*i+1, 0, 8, 8] = 3
        env.bodies[2*i+1, 0, 8, 9] = 2
        env.bodies[2*i+1, 0, 9, 9] = 1

    _envs = torch.cat([
        env.foods.repeat_interleave(env.num_snakes, dim=0),
        env.heads,
        env.bodies
    ], dim=1)

    env.orientations = determine_orientations(_envs)
    print(env.orientations)

    return env
Ejemplo n.º 9
0
parser = argparse.ArgumentParser()
parser.add_argument('--num-agents', type=int, default=10)
parser.add_argument('--size', type=int, default=36)
args = parser.parse_args()

num_envs = np.logspace(4, 12, 9, base=2).astype(int)
num_steps = 10

fps = []

for n in num_envs:
    env = MultiSnake(num_envs=n,
                     num_snakes=args.num_agents,
                     size=args.size,
                     manual_setup=False,
                     boost=True,
                     verbose=False,
                     device='cuda',
                     respawn_mode='any')

    all_actions = {
        f'agent_{i}':
        torch.randint(8, size=(num_steps, n)).long().to(DEFAULT_DEVICE)
        for i in range(args.num_agents)
    }

    env_steps = 0
    t0 = time()
    for i in range(num_steps):
        # print(i)
        actions = {