def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Occluder occluder_shape = np.array([[-0.1, 0.2], [1.1, 0.2], [1.1, 0.6], [-0.1, 0.6]]) occluder = sprite.Sprite(x=0., y=0., shape=occluder_shape, scale=1., c0=0.6, c1=1., c2=1.) # Prey prey_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.8), distribs.Continuous('x_vel', -0.01, 0.01) ], y=1.2, y_vel=-0.007, shape='circle', scale=0.07, c0=0.2, c1=1., c2=1., ) # Walls left_wall = [[0.05, -0.2], [0.05, 2], [-1, 2], [-1, -0.2]] right_wall = [[0.95, -0.2], [0.95, 2], [2, 2], [2, -0.2]] walls = [ sprite.Sprite(shape=np.array(v), x=0, y=0, c0=0., c1=0., c2=0.5) for v in [left_wall, right_wall] ] def state_initializer(): agent = sprite.Sprite(x=0.5, y=0.1, shape='square', aspect_ratio=0.2, scale=0.1, c0=0.33, c1=1., c2=0.66) state = collections.OrderedDict([ ('walls', walls), ('prey', [sprite.Sprite(**prey_factors.sample())]), ('agent', [agent]), ('occluder', [occluder]), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) asymmetric_collision = physics_lib.Collision(elasticity=1., symmetric=False, update_angle_vel=False) inelastic_collision = physics_lib.Collision(elasticity=0., symmetric=False, update_angle_vel=False) physics = physics_lib.Physics( (agent_friction_force, 'agent'), (inelastic_collision, 'agent', 'walls'), (asymmetric_collision, 'prey', 'walls'), updates_per_env_step=10, ) ############################################################################ # Task ############################################################################ contact_task = tasks.ContactReward(1., layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: all([s.y < 0. for s in state['prey']]), steps_after_condition=15, ) task = tasks.CompositeTask(contact_task, reset_task) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick(scaling_factor=0.002, action_layers='agent', constrained_lr=True) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb') ############################################################################ # Game rules ############################################################################ prey_vanish = game_rules.VanishOnContact( vanishing_layer='prey', contacting_layer='agent', ) rules = (prey_vanish, ) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def _get_config(num_ghosts, maze_size): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = dict(shape='circle', scale=0.05, c0=0.33, c1=1., c2=0.66) # Prey prey_factors = dict(shape='circle', scale=0.025, c0=0.2, c1=1., c2=1.) # Ghosts ghost_factors = dict(shape='circle', scale=0.05, mass=np.inf, c0=0., c1=1., c2=0.8) def state_initializer(): maze = maze_lib.generate_random_maze_matrix(size=maze_size, ambient_size=12) maze = maze_lib.Maze(np.flip(maze, axis=0)) walls = maze.to_sprites(c0=0., c1=0., c2=0.8) # Sample positions in maze grid of agent and ghosts n_ghosts = num_ghosts() points = maze.sample_distinct_open_points(1 + n_ghosts) positions = [maze.grid_side * (0.5 + np.array(x)) for x in points] # Agent agent_position = positions[0] agent = [ sprite.Sprite(x=agent_position[1], y=agent_position[0], **agent_factors) ] # ghosts ghosts = [] for i in range(n_ghosts): position = positions[i + 1] ghosts.append( sprite.Sprite(x=position[1], y=position[0], **ghost_factors)) # Place prey at every open maze location prey = [] open_maze_points = np.argwhere(maze.maze == 0) for p in open_maze_points: pos = maze.grid_side * (0.5 + np.array(p)) prey.append(sprite.Sprite(x=pos[1], y=pos[0], **prey_factors)) state = collections.OrderedDict([ ('walls', walls), ('prey', prey), ('ghosts', ghosts), ('agent', agent), ]) return state ############################################################################ # Physics ############################################################################ maze_physics = physics_lib.MazePhysics( maze_layer='walls', avatar_layers=('agent', 'prey', 'ghosts'), constant_speed=0.015, ) physics = physics_lib.Physics( (physics_lib.RandomMazeWalk(speed=0.015), ['ghosts']), updates_per_env_step=1, corrective_physics=[maze_physics], ) ############################################################################ # Task ############################################################################ ghost_task = tasks.ContactReward(-5, layers_0='agent', layers_1='ghosts', reset_steps_after_contact=0) prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: len(state['prey']) == 0, steps_after_condition=5, ) task = tasks.CompositeTask(ghost_task, prey_task, reset_task, timeout_steps=1000) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Grid( scaling_factor=0.015, action_layers='agent', control_velocity=True, momentum=0.5, # Value irrelevant, since maze_physics has constant speed ) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(256, 256), anti_aliasing=1, color_to_rgb='hsv_to_rgb', ) ############################################################################ # Game rules ############################################################################ def _unglue(s): s.mass = 1. def _unglue_condition(state): return not np.all(state['agent'][0].velocity == 0) unglue = game_rules.ConditionalRule( condition=_unglue_condition, rules=game_rules.ModifySprites(('prey', 'ghosts'), _unglue), ) vanish_on_contact = game_rules.VanishOnContact(vanishing_layer='prey', contacting_layer='agent') rules = (vanish_on_contact, unglue) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def _get_config(num_prey, num_predators): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = distribs.Product( [distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0., 1.)], scale=0.08, c0=0, c1=255, c2=0, ) # Predators predator_factors = distribs.Product( [distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0., 1.), distribs.Continuous('x_vel', -0.02, 0.02), distribs.Continuous('y_vel', -0.02, 0.02),], scale=0.08, shape='circle', opacity=192, c0=255, c1=0, c2=0, ) # Prey prey_factors = distribs.Product( [distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0., 1.), distribs.Continuous('x_vel', -0.02, 0.02), distribs.Continuous('y_vel', -0.02, 0.02),], scale=0.08, shape='circle', opacity=192, c0=255, c1=255, c2=0, ) # Create callable initializer returning entire state predator_generator = sprite_generators.generate_sprites( predator_factors, num_sprites=num_predators) prey_generator = sprite_generators.generate_sprites( prey_factors, num_sprites=num_prey) def state_initializer(): """Callable returning state at every episode reset.""" agent = sprite.Sprite(**agent_factors.sample()) predators = predator_generator(without_overlapping=(agent,)) prey = prey_generator(without_overlapping=(agent,)) state = collections.OrderedDict([ ('prey', prey), ('predators', predators), ('agent', [agent]), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) random_force = physics_lib.RandomForce(max_force_magnitude=0.01) predator_attraction = physics_lib.DistanceForce( physics_lib.linear_force_fn(zero_intercept=-0.001, slope=0.0005)) prey_avoid = physics_lib.DistanceForce( physics_lib.linear_force_fn(zero_intercept=0.001, slope=-0.0005)) forces = ( (agent_friction_force, 'agent'), (random_force, ['predators', 'prey']), (predator_attraction, 'agent', 'predators'), (prey_avoid, 'agent', 'prey'), ) constant_speed = physics_lib.ConstantSpeed( layer_names=['prey', 'predators'], speed=0.015) physics = physics_lib.Physics( *forces, updates_per_env_step=10, corrective_physics=[constant_speed], ) ############################################################################ # Task ############################################################################ predator_task = tasks.ContactReward( -5, layers_0='agent', layers_1='predators', reset_steps_after_contact=0) prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: len(state['prey']) == 0, steps_after_condition=5, ) task = tasks.CompositeTask( reset_task, predator_task, prey_task, timeout_steps=300) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.025, action_layers='agent', control_velocity=True) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, polygon_modifier=polygon_modifiers.TorusGeometry( ['agent', 'predators', 'prey']), ) ############################################################################ # Game rules ############################################################################ prey_vanish = game_rules.VanishOnContact( vanishing_layer='prey', contacting_layer='agent') def _torus_position_wrap(s): s.position = np.remainder(s.position, 1) torus_position_wrap = game_rules.ModifySprites( ('agent', 'predators', 'prey'), _torus_position_wrap) rules = (prey_vanish, torus_position_wrap) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': {'image': observer}, 'game_rules': rules, } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0.33, c1=1., c2=0.7, ) # Predators predator_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0., c1=1., c2=0.8, ) # Prey prey_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0.2, c1=1., c2=1., ) # Boosters booster_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='triangle', scale=0.1, c0=0.6, c1=1., c2=1., ) # Portals portal_factors = dict(shape='square', scale=0.1, c0=0., c1=0., c2=0.95) portal_sprites = [ sprite.Sprite(x=0.125, y=0.125, **portal_factors), sprite.Sprite(x=0.875, y=0.875, **portal_factors), ] # Walls wall_color = dict(c0=0., c1=0., c2=0.5) island_wall_shape_0 = np.array([[0.2, 0.2], [0.4, 0.2], [0.4, 0.4], [0.2, 0.4]]) island_wall_shapes = [ island_wall_shape_0, island_wall_shape_0 + np.array([[0., 0.4]]), island_wall_shape_0 + np.array([[0.4, 0.4]]), island_wall_shape_0 + np.array([[0.4, 0.]]), ] island_walls = [ sprite.Sprite(shape=shape, x=0., y=0., **wall_color) for shape in island_wall_shapes ] boundary_walls = shapes.border_walls(visible_thickness=0.05, **wall_color) walls = boundary_walls + island_walls # Callable sprite generators agent_generator = sprite_generators.generate_sprites(agent_factors, num_sprites=1) predator_generator = sprite_generators.generate_sprites(predator_factors, num_sprites=1) prey_generator = sprite_generators.generate_sprites( prey_factors, num_sprites=lambda: np.random.randint(2, 5)) booster_generator = sprite_generators.generate_sprites(booster_factors, num_sprites=2) # Create callable initializer returning entire state def state_initializer(): portals = portal_sprites agent = agent_generator(without_overlapping=walls) predators = predator_generator(without_overlapping=walls + agent) boosters = booster_generator(without_overlapping=walls + agent) prey = prey_generator(without_overlapping=walls) state = collections.OrderedDict([ ('walls', walls), ('portals', portals), ('boosters', boosters), ('prey', prey), ('predators', predators), ('agent', agent), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) predator_friction_force = physics_lib.Drag(coeff_friction=0.05) predator_random_force = physics_lib.RandomForce(max_force_magnitude=0.02) prey_friction_force = physics_lib.Drag(coeff_friction=0.02) prey_random_force = physics_lib.RandomForce(max_force_magnitude=0.02) predator_attraction = physics_lib.DistanceForce( force_fn=physics_lib.linear_force_fn(zero_intercept=-0.002, slope=0.001)) asymmetric_collision = physics_lib.Collision(elasticity=0.25, symmetric=False, update_angle_vel=False) forces = ( (agent_friction_force, 'agent'), (predator_friction_force, 'predators'), (predator_random_force, 'predators'), (prey_friction_force, 'prey'), (prey_random_force, 'prey'), (predator_attraction, 'agent', 'predators'), (asymmetric_collision, ['agent', 'predators', 'prey'], 'walls'), ) physics = physics_lib.Physics(*forces, updates_per_env_step=5) ############################################################################ # Task ############################################################################ predator_task = tasks.ContactReward(-5, layers_0='agent', layers_1='predators', reset_steps_after_contact=0) prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: len(state['prey']) == 0, steps_after_condition=5, ) task = tasks.CompositeTask(predator_task, prey_task, reset_task, timeout_steps=400) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.01, action_layers='agent', ) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', ) ############################################################################ # Game rules ############################################################################ disappear_rule = game_rules.VanishOnContact(vanishing_layer='prey', contacting_layer='agent') portal_rule = game_rules.Portal(teleporting_layer='agent', portal_layer='portals') rules = (disappear_rule, portal_rule, Booster()) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent = sprite.Sprite(x=0.5, y=0.5, shape='circle', scale=0.04, c0=0.33, c1=1., c2=0.66) annulus_vertices = shapes.annulus_vertices(inner_radius=0.08, outer_radius=0.3) agent_annulus = sprite.Sprite(x=0.5, y=0.5, shape=annulus_vertices, scale=1., c0=0.6, c1=1., c2=1.) # Predator generator max_predator_vel = 0.02 predator_pos = _get_boundary_pos_distribution(_FIELD_BUFFER) predator_vel = _get_vel_distribution(0.5 * max_predator_vel, max_predator_vel) predator_factors = distribs.Product( [predator_pos, predator_vel, distribs.Continuous('scale', 0.07, 0.13)], shape='circle', c0=0., c1=1., c2=0.8, ) # Prey generator max_prey_vel = 0.01 prey_pos = _get_boundary_pos_distribution(_FIELD_BUFFER) prey_vel = _get_vel_distribution(0.5 * max_prey_vel, max_prey_vel) prey_factors = distribs.Product( [prey_pos, prey_vel, distribs.Continuous('scale', 0.07, 0.13)], shape='circle', c0=0.2, c1=1., c2=1., ) # Grid grid = shapes.grid_lines(grid_x=_GRID_SIZE, grid_y=_GRID_SIZE, buffer_border=1., c0=0., c1=0., c2=0.5) def state_initializer(): state = collections.OrderedDict([ ('grid', grid), ('prey', []), ('agent', [agent]), ('predators', []), ('agent_annulus', [agent_annulus]), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) physics = physics_lib.Physics( (agent_friction_force, ['agent', 'agent_annulus']), updates_per_env_step=10, ) ############################################################################ # Task ############################################################################ def _predator_reward_fn(_, predator_sprite): return -2. * predator_sprite.scale predator_task = tasks.ContactReward( reward_fn=_predator_reward_fn, layers_0='agent', layers_1='predators', reset_steps_after_contact=0, ) def _prey_reward_fn(_, prey_sprite): return prey_sprite.scale prey_task = tasks.ContactReward( reward_fn=_prey_reward_fn, layers_0='agent', layers_1='prey', ) task = tasks.CompositeTask(predator_task, prey_task) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.003, action_layers=('agent', 'agent_annulus'), constrained_lr=False, ) ############################################################################ # Observer ############################################################################ _polygon_modifier = observers.polygon_modifiers.FirstPersonAgent( agent_layer='agent') observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', polygon_modifier=_polygon_modifier, ) ############################################################################ # Game rules ############################################################################ # Make predators appear randomly predator_appear_generator = sprite_generators.generate_sprites( predator_factors, num_sprites=1) predator_appear = game_rules.ConditionalRule( condition=lambda state: np.random.binomial(1, p=0.5), rules=game_rules.CreateSprites('predators', predator_appear_generator), ) # Make prey appear randomly prey_appear_generator = sprite_generators.generate_sprites(prey_factors, num_sprites=1) prey_appear = game_rules.ConditionalRule( condition=lambda state: np.random.binomial(1, p=0.2), rules=game_rules.CreateSprites('prey', prey_appear_generator), ) # Make predators and prey vanish when they are distant enough and moving # away. vanish_range = [-1. * _VANISH_DIST, 1. + _VANISH_DIST] def _should_vanish(s): pos_too_small = (s.position < vanish_range[0]) * (s.velocity < 0.) pos_too_large = (s.position > vanish_range[1]) * (s.velocity > 0.) return any(pos_too_small) or any(pos_too_large) predator_vanish = game_rules.VanishByFilter('predators', _should_vanish) prey_vanish = game_rules.VanishByFilter('prey', _should_vanish) # Keep agent near center keep_near_center = game_rules.KeepNearCenter( agent_layer='agent', layers_to_center=['agent_annulus', 'predators', 'prey'], grid_x=_GRID_SIZE, ) # Make prey vanish when contacted by agent prey_caught = game_rules.VanishOnContact(vanishing_layer='prey', contacting_layer='agent') rules = (predator_appear, prey_appear, prey_vanish, predator_vanish, keep_near_center, prey_caught) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config