def get_env(): """Get simple environment, wrapper in simulation wrapper.""" def _state_initializer(): agent = sprite.Sprite(x=0.5, y=0.5, scale=0.1, c0=128) target = sprite.Sprite(x=0.75, y=0.5, scale=0.1, c1=128) state = collections.OrderedDict([('agent', [agent]), ('target', [target])]) return state task = tasks.ContactReward(1., 'agent', 'target', reset_steps_after_contact=2) action_space = action_spaces.Grid(0.1, action_layers='agent', control_velocity=True) def _modify_meta_state(meta_state): meta_state['key'] = meta_state['key'] + 1 update_meta_state = game_rules.ModifyMetaState(_modify_meta_state) env = environment.Environment( state_initializer=_state_initializer, physics=physics_lib.Physics(), task=task, action_space=action_space, observers={'image': observers.PILRenderer(image_size=(64, 64))}, meta_state_initializer=lambda: {'key': 0}, game_rules=(update_meta_state, ), ) sim_env = simulation.SimulationEnvironment(env) return sim_env
def testGridActions(self): def _state_initializer(): agent = sprite.Sprite(x=0.5, y=0.5, scale=0.1, c0=128) return collections.OrderedDict([('agent', [agent])]) max_episode_length = 5 task = tasks.Reset(lambda _: True, steps_after_condition=max_episode_length - 1) env = environment.Environment( state_initializer=_state_initializer, physics=physics_lib.Physics(), task=task, action_space=action_spaces.Grid(), observers={'image': observers.PILRenderer(image_size=(64, 64))}) gym_env = gym_wrapper.GymWrapper(env) assert (gym_env.observation_space == spaces.Dict({ 'image': spaces.Box(-np.inf, np.inf, shape=(64, 64, 3), dtype=np.uint8) })) assert (gym_env.action_space == spaces.Discrete(5)) for _ in range(3): gym_env.reset() for _ in range(max_episode_length - 1): action = gym_env.action_space.sample() obs, reward, done, _ = gym_env.step(action) assert (obs['image'].dtype == np.uint8) assert not done assert reward == 0. action = gym_env.action_space.sample() _, _, done, _ = gym_env.step(action) assert done _, _, done, _ = gym_env.step(action) assert not done
def _run_test(self, tether, step_1_state, final_state, plot=False): """Run test. Set plot = True to display videos of the test conditions. Args: tether: Tether force. step_1_state: Iterable of lists, one for each sprite. Each element is a list of [position, velocity, angle_vel] for the sprite after the first physics step. final_state: Same as step_1_state, except for the final step. plot: Bool. Whether to display video or run test. """ state = get_state() collision = physics_lib.Collision(elasticity=0., symmetric=False, update_angle_vel=True) physics = physics_lib.Physics( (collision, 'sprites', 'walls'), corrective_physics=[tether], updates_per_env_step=10, ) steps = 45 if plot: MatplotlibUI()._simulate_video(physics, state, steps=steps) else: physics.step(state) for s, pred in zip(state['sprites'], step_1_state): assert np.allclose(s.position, pred[0], atol=_ATOL) assert np.allclose(s.velocity, pred[1], atol=_ATOL) assert np.allclose(s.angle_vel, pred[2], atol=_ATOL) for _ in range(steps - 1): physics.step(state) for s, pred in zip(state['sprites'], final_state): assert np.allclose(s.position, pred[0], atol=_ATOL) assert np.allclose(s.velocity, pred[1], atol=_ATOL) assert np.allclose(s.angle_vel, pred[2], atol=_ATOL)
def _get_config(max_vel): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Grid grid = shapes.grid_lines(grid_x=_GRID_SIZE, grid_y=_GRID_SIZE, buffer_border=1., c0=0., c1=0., c2=0.5) def state_initializer(): agent = sprite.Sprite(x=0.5, y=0.5, shape='circle', scale=0.04, c0=0.33, c1=1., c2=0.66) annulus_shape = shapes.annulus_vertices(0.15, 2.) agent_annulus = sprite.Sprite(x=0.5, y=0.5, shape=annulus_shape, scale=1., c0=0.6, c1=1., c2=1.) prey = get_prey( get_parallelogram(min_axis_ratio=0.5), scale=0.4, max_vel=max_vel, sprite_scale=0.075, ) state = collections.OrderedDict([ ('grid', grid), ('prey', prey), ('agent', [agent]), ('agent_annulus', [agent_annulus]), ]) return state ############################################################################ # Physics ############################################################################ force = (physics_lib.Drag(coeff_friction=0.25), ['agent', 'agent_annulus']) corrective_physics = physics_lib.Tether(('prey', ), update_angle_vel=True) physics = physics_lib.Physics( force, updates_per_env_step=10, corrective_physics=corrective_physics, ) ############################################################################ # Task ############################################################################ prey_task = tasks.ContactReward( 1, layers_0='agent', layers_1='prey', condition=lambda s_agent, s_prey: s_prey.c1 > 0.5, ) reset_trial_task = tasks.Reset( condition=lambda state: all([s.c1 < 0.5 for s in state['prey']]), steps_after_condition=10, ) task = tasks.CompositeTask(prey_task, reset_trial_task, timeout_steps=500) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick(scaling_factor=0.01, action_layers=('agent', 'agent_annulus')) ############################################################################ # Observer ############################################################################ _polygon_modifier = observers.polygon_modifiers.FirstPersonAgent( agent_layer='agent') observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', polygon_modifier=_polygon_modifier, ) ############################################################################ # Game rules ############################################################################ # Make prey gray upon contact def _make_prey_gray(prey): prey.c1 = 0. prey.c2 = 0.6 make_prey_gray = game_rules.ModifyOnContact( layers_0='agent', layers_1='prey', modifier_1=_make_prey_gray, ) # Keep agent near center keep_near_center = game_rules.KeepNearCenter( agent_layer='agent', layers_to_center=['agent_annulus', 'prey'], grid_x=_GRID_SIZE, ) rules = (make_prey_gray, keep_near_center) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def _get_config(num_ghosts, maze_size): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = dict(shape='circle', scale=0.05, c0=0.33, c1=1., c2=0.66) # Prey prey_factors = dict(shape='circle', scale=0.025, c0=0.2, c1=1., c2=1.) # Ghosts ghost_factors = dict(shape='circle', scale=0.05, mass=np.inf, c0=0., c1=1., c2=0.8) def state_initializer(): maze = maze_lib.generate_random_maze_matrix(size=maze_size, ambient_size=12) maze = maze_lib.Maze(np.flip(maze, axis=0)) walls = maze.to_sprites(c0=0., c1=0., c2=0.8) # Sample positions in maze grid of agent and ghosts n_ghosts = num_ghosts() points = maze.sample_distinct_open_points(1 + n_ghosts) positions = [maze.grid_side * (0.5 + np.array(x)) for x in points] # Agent agent_position = positions[0] agent = [ sprite.Sprite(x=agent_position[1], y=agent_position[0], **agent_factors) ] # ghosts ghosts = [] for i in range(n_ghosts): position = positions[i + 1] ghosts.append( sprite.Sprite(x=position[1], y=position[0], **ghost_factors)) # Place prey at every open maze location prey = [] open_maze_points = np.argwhere(maze.maze == 0) for p in open_maze_points: pos = maze.grid_side * (0.5 + np.array(p)) prey.append(sprite.Sprite(x=pos[1], y=pos[0], **prey_factors)) state = collections.OrderedDict([ ('walls', walls), ('prey', prey), ('ghosts', ghosts), ('agent', agent), ]) return state ############################################################################ # Physics ############################################################################ maze_physics = physics_lib.MazePhysics( maze_layer='walls', avatar_layers=('agent', 'prey', 'ghosts'), constant_speed=0.015, ) physics = physics_lib.Physics( (physics_lib.RandomMazeWalk(speed=0.015), ['ghosts']), updates_per_env_step=1, corrective_physics=[maze_physics], ) ############################################################################ # Task ############################################################################ ghost_task = tasks.ContactReward(-5, layers_0='agent', layers_1='ghosts', reset_steps_after_contact=0) prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: len(state['prey']) == 0, steps_after_condition=5, ) task = tasks.CompositeTask(ghost_task, prey_task, reset_task, timeout_steps=1000) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Grid( scaling_factor=0.015, action_layers='agent', control_velocity=True, momentum=0.5, # Value irrelevant, since maze_physics has constant speed ) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(256, 256), anti_aliasing=1, color_to_rgb='hsv_to_rgb', ) ############################################################################ # Game rules ############################################################################ def _unglue(s): s.mass = 1. def _unglue_condition(state): return not np.all(state['agent'][0].velocity == 0) unglue = game_rules.ConditionalRule( condition=_unglue_condition, rules=game_rules.ModifySprites(('prey', 'ghosts'), _unglue), ) vanish_on_contact = game_rules.VanishOnContact(vanishing_layer='prey', contacting_layer='agent') rules = (vanish_on_contact, unglue) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def get_config(num_targets): """Get environment config. Args: num_targets: Int. Number of targets. """ if num_targets == 0 or not isinstance(num_targets, int): raise ValueError( f'num_targets is {num_targets}, but must be a positive integer') ############################################################################ # State initialization ############################################################################ screen = sprite.Sprite(x=0.5, y=0.5, shape='square', scale=2., c0=0.6, c1=0.7, c2=0.7) target_factor_distrib = distribs.Product( [distribs.Continuous('c0', 0., 1.)], shape='circle', scale=0.085, c1=1., c2=1., ) cover_factors = dict(mass=0., shape='circle', scale=0.1, c0=0., c1=0., c2=0.5, opacity=0) def state_initializer(): """State initializer method to be fed into environment.""" # Get targets and covers sprite_positions = 0.5 + 0.35 * _get_polygon(num_targets, 0.7) target_factors = [ target_factor_distrib.sample() for _ in range(num_targets) ] targets = [ sprite.Sprite(x=pos[0], y=pos[1], **factors) for pos, factors in zip(sprite_positions, target_factors) ] covers = [ sprite.Sprite(x=pos[0], y=pos[1], **cover_factors) for pos in sprite_positions ] # Tag the cover metadata based on whether they are prey or not for i, s in enumerate(covers): if i == 0: s.metadata = {'prey': True} else: s.metadata = {'prey': False} # Make cue have the same factors as the first target, except slightly # smaller cue_factors = copy.deepcopy(target_factors[0]) cue_factors['scale'] = 0.7 * target_factors[0]['scale'] cue = sprite.Sprite(x=0.5, y=0.501, opacity=0, mass=np.inf, **cue_factors) agent = sprite.Sprite(x=0.5, y=0.5, shape='circle', scale=0.1, c0=0.4, c1=0., c2=1., mass=np.inf) annulus_verts = shapes.annulus_vertices(0.34, 0.36) annulus = sprite.Sprite(x=0.5, y=0.5, shape=annulus_verts, scale=1., c0=0., c1=0., c2=0.3) state = collections.OrderedDict([ ('annulus', [annulus]), ('targets', targets), ('covers', covers), ('agent', [agent]), ('cue', [cue]), ('screen', [screen]), ]) return state ################################################################################ # Physics ################################################################################ drag = (physics_lib.Drag(coeff_friction=0.25), ['agent', 'cue']) tether_covers = physics_lib.TetherZippedLayers(('targets', 'covers'), anchor=np.array([0.5, 0.5])) physics = physics_lib.Physics( drag, updates_per_env_step=1, corrective_physics=[tether_covers], ) ################################################################################ # Task ################################################################################ contact_task = tasks.ContactReward( reward_fn=lambda _, s: 1 if s.metadata['prey'] else -1, layers_0='agent', layers_1='covers', ) def _should_reset(state, meta_state): should_reset = (state['covers'][0].opacity == 0 and meta_state['phase'] == 'response') return should_reset reset_task = tasks.Reset( condition=_should_reset, steps_after_condition=15, ) task = tasks.CompositeTask(contact_task, reset_task, timeout_steps=800) ################################################################################ # Action Space ################################################################################ action_space = action_spaces.Joystick(scaling_factor=0.01, action_layers=['agent', 'cue']) ################################################################################ # Observer ################################################################################ _polygon_modifier = observers.polygon_modifiers.FirstPersonAgent( agent_layer='agent') observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', polygon_modifier=_polygon_modifier, ) ############################################################################ # Game rules ############################################################################ def _make_opaque(s): s.opacity = 255 def _make_transparent(s): s.opacity = 0 # Screen Phase screen_phase = gr.Phase(duration=1, name='screen') # Visible Phase disappear_screen = gr.ModifySprites('screen', _make_transparent) visible_phase = gr.Phase(one_time_rules=disappear_screen, duration=2, name='visible') # Motion Phase def _move(s): s.velocity = np.random.uniform(-0.25, 0.25, size=(2, )) cover_targets = gr.ModifySprites('covers', _make_opaque) begin_motion = BeginMotion(angle_vel_range=(0.1, 0.3)) motion_phase = gr.Phase( one_time_rules=[cover_targets, begin_motion], duration=100, name='motion', ) # Response Phase def _stop(s): s.angle_vel = 0. s.velocity = np.zeros(2) def _unglue(s): s.mass = 1. appear_cue = gr.ModifySprites('cue', _make_opaque) stop_targets = gr.ModifySprites(('targets', 'covers'), _stop) unglue_agent = gr.ModifySprites(('agent', 'cue'), _unglue) make_targets_discoverable = gr.ModifyOnContact( layers_0='agent', layers_1='covers', modifier_1=_make_transparent) response_phase = gr.Phase( one_time_rules=[appear_cue, stop_targets, unglue_agent], continual_rules=make_targets_discoverable, name='response', ) phase_sequence = gr.PhaseSequence( screen_phase, visible_phase, motion_phase, response_phase, meta_state_phase_name_key='phase', ) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': (phase_sequence, ), 'meta_state_initializer': lambda: { 'phase': '' } } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = distribs.Product( [distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9)], shape='circle', scale=0.1, c0=0.33, c1=1., c2=0.66, ) # Predators shape_0 = 1.8 * np.array( [[-0.3, -0.3], [0.1, -0.7], [0.4, 0.6], [-0.1, 0.25]]) shape_1 = 1.5 * np.array( [[-0.5, -0.3], [-0.1, -0.7], [0.7, 0.1], [0., -0.1], [-0.3, 0.25]]) predator_factors = distribs.Product( [distribs.Continuous('x', 0.2, 0.8), distribs.Continuous('y', 0.2, 0.8), distribs.Discrete( 'shape', [shape_0, shape_1, 'star_5', 'triangle', 'spoke_5']), distribs.Continuous('angle', 0., 2 * np.pi), distribs.Continuous('aspect_ratio', 0.75, 1.25), distribs.Continuous('scale', 0.1, 0.15), distribs.Continuous('x_vel', -0.03, 0.03), distribs.Continuous('y_vel', -0.03, 0.03), distribs.Continuous('angle_vel', -0.05, 0.05)], c0=0., c1=1., c2=0.8, ) # Walls walls = shapes.border_walls(visible_thickness=0.05, c0=0., c1=0., c2=0.5) # Create callable initializer returning entire state agent_generator = sprite_generators.generate_sprites( agent_factors, num_sprites=1) predator_generator = sprite_generators.generate_sprites( predator_factors, num_sprites=5) def state_initializer(): predators = predator_generator( disjoint=True, without_overlapping=walls) agent = agent_generator(without_overlapping=walls + predators) state = collections.OrderedDict([ ('walls', walls), ('predators', predators), ('agent', agent), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) asymmetric_collision = physics_lib.Collision( elasticity=1., symmetric=False, update_angle_vel=True) symmetric_collision = physics_lib.Collision( elasticity=1., symmetric=True, update_angle_vel=True) agent_wall_collision = physics_lib.Collision( elasticity=0., symmetric=False, update_angle_vel=False) forces = ( (agent_friction_force, 'agent'), (symmetric_collision, 'predators', 'predators'), (asymmetric_collision, 'predators', 'walls'), (agent_wall_collision, 'agent', 'walls'), ) physics = physics_lib.Physics(*forces, updates_per_env_step=10) ############################################################################ # Task ############################################################################ predator_task = tasks.ContactReward( -5, layers_0='agent', layers_1='predators') stay_alive_task = tasks.StayAlive( reward_period=20, reward_value=0.2, ) task = tasks.CompositeTask( predator_task, stay_alive_task, timeout_steps=200) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.01, action_layers='agent') ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb') ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': {'image': observer}, } return config
def _get_config(translucent_occluder): """Get environment config.""" ############################################################################ # Physics ############################################################################ elastic_collision = physics_lib.Collision(elasticity=1., symmetric=False, update_angle_vel=False) physics = physics_lib.Physics( (elastic_collision, 'targets', 'walls'), updates_per_env_step=10, ) def _predict_contact(state): """Predict whether targets will contact.""" while True: if state['targets'][0].overlaps_sprite(state['targets'][1]): return True if all(s.y > 1.1 and s.y_vel > 0 for s in state['targets']): # Both targets above screen and moving up break physics.step(state) return False ############################################################################ # Sprite initialization ############################################################################ # Targets target_y_speed = 0.02 target_factors = distribs.Product( [ distribs.Continuous('x', 0.15, 0.85), distribs.Continuous('x_vel', -target_y_speed, target_y_speed) ], y_vel=-target_y_speed, scale=0.16, shape='circle', opacity=192, c0=255, c1=0, c2=0, ) # Occluder occluder = sprite.Sprite(x=0.5, y=0.2, shape='square', scale=1., c0=192, c1=192, c2=128, opacity=128 if translucent_occluder else 255) # Walls bottom_wall = [[-1, 0.1], [2, 0.1], [2, -1], [-1, -1]] left_wall = [[0.05, -1], [0.05, 4], [-1, 4], [-1, -1]] right_wall = [[0.95, -1], [0.95, 4], [2, 4], [2, -1]] walls = [ sprite.Sprite(shape=np.array(v), x=0, y=0, c0=128, c1=128, c2=128) for v in [bottom_wall, left_wall, right_wall] ] # Make response boxes and tokens response_box_factors = dict(y=0.05, scale=0.12, shape='square', aspect_ratio=0.5, c0=0, c1=0, c2=0) response_boxes = [ sprite.Sprite(x=0.4, **response_box_factors), sprite.Sprite(x=0.6, **response_box_factors), ] response_token_factors = dict(y=0.05, scale=0.03, shape='circle', c0=255, c1=0, c2=0, opacity=192) response_tokens = [ sprite.Sprite(x=x, **response_token_factors) for x in [0.37, 0.43, 0.59, 0.61] ] def state_initializer(): """Callable returning state ordereddict each episode reset.""" agent = sprite.Sprite(x=0.5, y=0.05, scale=0.03, shape='spoke_4', c0=255, c1=255, c2=255) target_0 = sprite.Sprite(y=1.4, **target_factors.sample()) target_1 = sprite.Sprite(y=np.random.uniform(1.7, 2.4), **target_factors.sample()) screen = sprite.Sprite(x=0.5, y=0.5, shape='square', c0=128, c1=128, c2=128) state = collections.OrderedDict([ ('targets', [target_0, target_1]), ('occluders', [occluder]), ('walls', walls), ('response_boxes', response_boxes), ('response_tokens', response_tokens), ('agent', [agent]), ('screen', [screen]), ]) # Predict whether targets will contact, putting this information in # agent metadata orig_pos = [np.copy(s.position) for s in state['targets']] orig_vel = [np.copy(s.velocity) for s in state['targets']] agent.metadata = {'will_contact': _predict_contact(state)} for s, pos, vel in zip(state['targets'], orig_pos, orig_vel): s.position = pos s.velocity = vel return state ############################################################################ # Task ############################################################################ def _reward_fn(state): agent = state['agent'][0] if agent.overlaps_sprite(state['response_boxes'][0]): # Collision response return -1 if agent.metadata['will_contact'] else 1 elif agent.overlaps_sprite(state['response_boxes'][1]): # No collision response return 1 if agent.metadata['will_contact'] else -1 else: return 0 conditional_task = tasks.Reset( condition=lambda state: _reward_fn(state) != 0, reward_fn=_reward_fn, steps_after_condition=5, ) task = tasks.CompositeTask(conditional_task, timeout_steps=1000) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Grid( scaling_factor=0.015, action_layers='agent', control_velocity=True, ) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1) ############################################################################ # Game rules ############################################################################ screen_vanish = game_rules.VanishByFilter('screen') screen_vanish = game_rules.TimedRule(step_interval=(15, 16), rules=(screen_vanish, )) rules = (screen_vanish, ) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent = sprite.Sprite(x=0.5, y=0.5, shape='circle', scale=0.04, c0=0.33, c1=1., c2=0.66) annulus_vertices = shapes.annulus_vertices(inner_radius=0.08, outer_radius=0.3) agent_annulus = sprite.Sprite(x=0.5, y=0.5, shape=annulus_vertices, scale=1., c0=0.6, c1=1., c2=1.) # Predator generator max_predator_vel = 0.02 predator_pos = _get_boundary_pos_distribution(_FIELD_BUFFER) predator_vel = _get_vel_distribution(0.5 * max_predator_vel, max_predator_vel) predator_factors = distribs.Product( [predator_pos, predator_vel, distribs.Continuous('scale', 0.07, 0.13)], shape='circle', c0=0., c1=1., c2=0.8, ) # Prey generator max_prey_vel = 0.01 prey_pos = _get_boundary_pos_distribution(_FIELD_BUFFER) prey_vel = _get_vel_distribution(0.5 * max_prey_vel, max_prey_vel) prey_factors = distribs.Product( [prey_pos, prey_vel, distribs.Continuous('scale', 0.07, 0.13)], shape='circle', c0=0.2, c1=1., c2=1., ) # Grid grid = shapes.grid_lines(grid_x=_GRID_SIZE, grid_y=_GRID_SIZE, buffer_border=1., c0=0., c1=0., c2=0.5) def state_initializer(): state = collections.OrderedDict([ ('grid', grid), ('prey', []), ('agent', [agent]), ('predators', []), ('agent_annulus', [agent_annulus]), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) physics = physics_lib.Physics( (agent_friction_force, ['agent', 'agent_annulus']), updates_per_env_step=10, ) ############################################################################ # Task ############################################################################ def _predator_reward_fn(_, predator_sprite): return -2. * predator_sprite.scale predator_task = tasks.ContactReward( reward_fn=_predator_reward_fn, layers_0='agent', layers_1='predators', reset_steps_after_contact=0, ) def _prey_reward_fn(_, prey_sprite): return prey_sprite.scale prey_task = tasks.ContactReward( reward_fn=_prey_reward_fn, layers_0='agent', layers_1='prey', ) task = tasks.CompositeTask(predator_task, prey_task) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.003, action_layers=('agent', 'agent_annulus'), constrained_lr=False, ) ############################################################################ # Observer ############################################################################ _polygon_modifier = observers.polygon_modifiers.FirstPersonAgent( agent_layer='agent') observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', polygon_modifier=_polygon_modifier, ) ############################################################################ # Game rules ############################################################################ # Make predators appear randomly predator_appear_generator = sprite_generators.generate_sprites( predator_factors, num_sprites=1) predator_appear = game_rules.ConditionalRule( condition=lambda state: np.random.binomial(1, p=0.5), rules=game_rules.CreateSprites('predators', predator_appear_generator), ) # Make prey appear randomly prey_appear_generator = sprite_generators.generate_sprites(prey_factors, num_sprites=1) prey_appear = game_rules.ConditionalRule( condition=lambda state: np.random.binomial(1, p=0.2), rules=game_rules.CreateSprites('prey', prey_appear_generator), ) # Make predators and prey vanish when they are distant enough and moving # away. vanish_range = [-1. * _VANISH_DIST, 1. + _VANISH_DIST] def _should_vanish(s): pos_too_small = (s.position < vanish_range[0]) * (s.velocity < 0.) pos_too_large = (s.position > vanish_range[1]) * (s.velocity > 0.) return any(pos_too_small) or any(pos_too_large) predator_vanish = game_rules.VanishByFilter('predators', _should_vanish) prey_vanish = game_rules.VanishByFilter('prey', _should_vanish) # Keep agent near center keep_near_center = game_rules.KeepNearCenter( agent_layer='agent', layers_to_center=['agent_annulus', 'predators', 'prey'], grid_x=_GRID_SIZE, ) # Make prey vanish when contacted by agent prey_caught = game_rules.VanishOnContact(vanishing_layer='prey', contacting_layer='agent') rules = (predator_appear, prey_appear, prey_vanish, predator_vanish, keep_near_center, prey_caught) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agents agent_factors = distribs.Product( [distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0.35, 0.65)], shape='circle', scale=0.1, c1=1., c2=0.7, ) agent_0_factors = distribs.Product([agent_factors], c0=0.2) agent_1_factors = distribs.Product([agent_factors], c0=0.1) agent_2_factors = distribs.Product([agent_factors], c0=0.) # Walls walls = shapes.border_walls(visible_thickness=0.05, c0=0., c1=0., c2=0.5) # Fountains fountain_factors = { 'shape': 'circle', 'scale': 0.05, 'c0': 0.6, 'c1': 1., 'c2': _BAD_VALUE} fountains_across = np.linspace(0.1, 0.9, 6) fountains_up = np.linspace(0.75, 0.9, 2) fountains_grid_x, fountains_grid_y = np.meshgrid(fountains_across, fountains_up) fountains_positions = zip(np.ravel(fountains_grid_x), np.ravel(fountains_grid_y)) fountain_sprites = [ sprite.Sprite(x=x, y=y, **fountain_factors) for (x, y) in fountains_positions ] # Fruits fruit_factors = { 'shape': 'circle', 'scale': 0.05, 'c0': 0.3, 'c1': 1., 'c2': _BAD_VALUE} fruits_across = np.linspace(0.1, 0.9, 6) fruits_up = np.linspace(0.1, 0.25, 2) fruits_grid_x, fruits_grid_y = np.meshgrid(fruits_across, fruits_up) fruits_positions = zip(np.ravel(fruits_grid_x), np.ravel(fruits_grid_y)) fruit_sprites = [ sprite.Sprite(x=x, y=y, **fruit_factors) for (x, y) in fruits_positions ] # Create callable initializer returning entire state agent_0_generator = sprite_generators.generate_sprites( agent_0_factors, num_sprites=1) agent_1_generator = sprite_generators.generate_sprites( agent_1_factors, num_sprites=1) agent_2_generator = sprite_generators.generate_sprites( agent_2_factors, num_sprites=1) def state_initializer(): agent_0 = agent_0_generator(without_overlapping=walls) agent_1 = agent_1_generator(without_overlapping=walls) agent_2 = agent_2_generator(without_overlapping=walls) state = collections.OrderedDict([ ('walls', walls), ('fountains', fountain_sprites), ('fruits', fruit_sprites), ('agent_2', agent_2), ('agent_1', agent_1), ('agent_0', agent_0), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) asymmetric_collision = physics_lib.Collision( elasticity=0.25, symmetric=False) forces = ( (agent_friction_force, ['agent_0', 'agent_1', 'agent_2']), (asymmetric_collision, ['agent_0', 'agent_1', 'agent_2'], 'walls'), ) physics = physics_lib.Physics(*forces, updates_per_env_step=5) ############################################################################ # Task ############################################################################ task = tasks.ContactReward( 1, layers_0='agent_0', layers_1='fruits', condition=lambda s_0, s_1: s_1.c2 > _VALUE_THRESHOLD, ) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Composite( agent_0=action_spaces.Joystick( scaling_factor=0.005, action_layers='agent_0'), agent_1=action_spaces.Joystick( scaling_factor=0.005, action_layers='agent_1'), agent_2=action_spaces.Joystick( scaling_factor=0.005, action_layers='agent_2'), ) ############################################################################ # Observer ############################################################################ image_observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', ) raw_state_observer = observers.RawState() # needed by hand-crafted agents ############################################################################ # Game rules ############################################################################ def _spoil_fruit(sprite): sprite.c2 = _BAD_VALUE def _ripen_fruit(sprite): sprite.c2 = _GOOD_VALUE def _poison_fountain(sprite): sprite.c2 = _BAD_VALUE def _clean_fountain(sprite): sprite.c2 = _GOOD_VALUE def agents_contacting_layer(state, layer, value): n_contact = 0 for s in state[layer]: if s.c2 != value: continue n_contact += ( s.overlaps_sprite(state['agent_0'][0]) or s.overlaps_sprite(state['agent_1'][0]) or s.overlaps_sprite(state['agent_2'][0]) ) return n_contact poison_fountains = game_rules.ModifySprites( layers='fountains', modifier=_poison_fountain, sample_one=True, filter_fn=lambda s: s.c2 > _VALUE_THRESHOLD) poison_fountains = game_rules.ConditionalRule( condition=lambda s: agents_contacting_layer(s, 'fruits', _GOOD_VALUE), rules=poison_fountains, ) ripen_fruits = game_rules.ModifySprites( layers='fruits', modifier=_ripen_fruit, sample_one=True, filter_fn=lambda s: s.c2 < _VALUE_THRESHOLD) ripen_fruits = game_rules.ConditionalRule( condition=lambda s: agents_contacting_layer(s, 'fountains', _BAD_VALUE), rules=ripen_fruits, ) spoil_fruits = game_rules.ModifyOnContact( layers_0='fruits', layers_1=('agent_0', 'agent_1', 'agent_2'), modifier_0=_spoil_fruit, filter_0=lambda s: s.c2 > _VALUE_THRESHOLD) clean_fountains = game_rules.ModifyOnContact( layers_0='fountains', layers_1=('agent_0', 'agent_1', 'agent_2'), modifier_0=_clean_fountain, filter_0=lambda s: s.c2 < _VALUE_THRESHOLD) rules = (poison_fountains, spoil_fruits, ripen_fruits, clean_fountains) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': {'image': image_observer, 'state': raw_state_observer}, 'game_rules': rules, } return config
def _get_config(num_prey, num_predators): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = distribs.Product( [distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0., 1.)], scale=0.08, c0=0, c1=255, c2=0, ) # Predators predator_factors = distribs.Product( [distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0., 1.), distribs.Continuous('x_vel', -0.02, 0.02), distribs.Continuous('y_vel', -0.02, 0.02),], scale=0.08, shape='circle', opacity=192, c0=255, c1=0, c2=0, ) # Prey prey_factors = distribs.Product( [distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0., 1.), distribs.Continuous('x_vel', -0.02, 0.02), distribs.Continuous('y_vel', -0.02, 0.02),], scale=0.08, shape='circle', opacity=192, c0=255, c1=255, c2=0, ) # Create callable initializer returning entire state predator_generator = sprite_generators.generate_sprites( predator_factors, num_sprites=num_predators) prey_generator = sprite_generators.generate_sprites( prey_factors, num_sprites=num_prey) def state_initializer(): """Callable returning state at every episode reset.""" agent = sprite.Sprite(**agent_factors.sample()) predators = predator_generator(without_overlapping=(agent,)) prey = prey_generator(without_overlapping=(agent,)) state = collections.OrderedDict([ ('prey', prey), ('predators', predators), ('agent', [agent]), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) random_force = physics_lib.RandomForce(max_force_magnitude=0.01) predator_attraction = physics_lib.DistanceForce( physics_lib.linear_force_fn(zero_intercept=-0.001, slope=0.0005)) prey_avoid = physics_lib.DistanceForce( physics_lib.linear_force_fn(zero_intercept=0.001, slope=-0.0005)) forces = ( (agent_friction_force, 'agent'), (random_force, ['predators', 'prey']), (predator_attraction, 'agent', 'predators'), (prey_avoid, 'agent', 'prey'), ) constant_speed = physics_lib.ConstantSpeed( layer_names=['prey', 'predators'], speed=0.015) physics = physics_lib.Physics( *forces, updates_per_env_step=10, corrective_physics=[constant_speed], ) ############################################################################ # Task ############################################################################ predator_task = tasks.ContactReward( -5, layers_0='agent', layers_1='predators', reset_steps_after_contact=0) prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: len(state['prey']) == 0, steps_after_condition=5, ) task = tasks.CompositeTask( reset_task, predator_task, prey_task, timeout_steps=300) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.025, action_layers='agent', control_velocity=True) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, polygon_modifier=polygon_modifiers.TorusGeometry( ['agent', 'predators', 'prey']), ) ############################################################################ # Game rules ############################################################################ prey_vanish = game_rules.VanishOnContact( vanishing_layer='prey', contacting_layer='agent') def _torus_position_wrap(s): s.position = np.remainder(s.position, 1) torus_position_wrap = game_rules.ModifySprites( ('agent', 'predators', 'prey'), _torus_position_wrap) rules = (prey_vanish, torus_position_wrap) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': {'image': observer}, 'game_rules': rules, } return config
def get_config(num_predators): """Get config dictionary of kwargs for environment constructor. Args: num_predators: Int. Number of predators. """ ############################################################################ # Sprite initialization ############################################################################ state_initialization = StateInitialization( num_predators=num_predators, step_scaling_factor=0.1, threshold_trial_len=200, ) ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) predator_friction_force = physics_lib.Drag(coeff_friction=0.04) predator_random_force = physics_lib.RandomForce(max_force_magnitude=0.03) predator_attraction = physics_lib.DistanceForce( physics_lib.linear_force_fn(zero_intercept=-0.0025, slope=0.0001)) elastic_asymmetric_collision = physics_lib.Collision(elasticity=1., symmetric=False) inelastic_asymmetric_collision = physics_lib.Collision(elasticity=0., symmetric=False) forces = ( (agent_friction_force, 'agent'), (predator_friction_force, 'predators'), (predator_random_force, 'predators'), (predator_attraction, 'agent', 'predators'), (elastic_asymmetric_collision, 'predators', 'walls'), (inelastic_asymmetric_collision, 'agent', 'walls'), ) physics = physics_lib.Physics(*forces, updates_per_env_step=10) ############################################################################ # Task ############################################################################ task = tasks.ContactReward(-1, layers_0='agent', layers_1='predators', reset_steps_after_contact=0) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick(scaling_factor=0.01, action_layers='agent') ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb') ############################################################################ # Game rules ############################################################################ def _increment_count(meta_state): meta_state['count'] += 1 rules = game_rules.ModifyMetaState(_increment_count) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initialization.state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'meta_state_initializer': state_initialization.meta_state_initializer, } return config
def _get_config(num_obstacles, valid_step_range): """Get environment config. Args: num_obstacles: Int. Number of obstacles. valid_step_range: 2-iterable of ints. (min_num_steps, max_num_steps). All trials must have duration in this step range. Returns: config: Config dictionary to pass to environment constructor. """ ############################################################################ # Physics ############################################################################ elastic_collision = physics_lib.Collision(elasticity=1., symmetric=False, update_angle_vel=False) physics = physics_lib.Physics( (elastic_collision, 'ball', 'walls'), updates_per_env_step=10, ) def _predict_trial_end(state): """Predict whether a trial will end in step range and true response. Args: state: OrderedDict of sprite layers. Initial state of environment. Returns: valid_trial: Bool. Whether trial will end with number of steps in valid_step_range. contact_color: Binary. 0 if ball will contact red first, 1 if it will contact green first. """ for step in range(valid_step_range[1]): red_overlap = state['ball'][0].overlaps_sprite(state['red'][0]) green_overlap = state['ball'][0].overlaps_sprite(state['green'][0]) if red_overlap or green_overlap: if step < valid_step_range[0]: return False, None else: contact_color = 0 if red_overlap else 1 return True, contact_color physics.step(state) return False, None ############################################################################ # Sprite initialization ############################################################################ # Ball generator ball_factors = distribs.Product( [ distribs.Continuous('x', 0.15, 0.85), distribs.Continuous('y', 0.15, 0.85), RadialVelocity(speed=0.03) ], scale=0.05, shape='circle', c0=64, c1=64, c2=255, ) ball_generator = sprite_generators.generate_sprites( ball_factors, num_sprites=1, max_recursion_depth=100, fail_gracefully=True) # Obstacle generator obstacle_factors = distribs.Product( [ distribs.Continuous('x', 0.2, 0.8), distribs.Continuous('y', 0.2, 0.8) ], scale=0.2, shape='square', c0=128, c1=128, c2=128, ) obstacle_generator = sprite_generators.generate_sprites( obstacle_factors, num_sprites=2 + num_obstacles, max_recursion_depth=100, fail_gracefully=True) # Walls bottom_wall = [[-1, 0.1], [2, 0.1], [2, -1], [-1, -1]] top_wall = [[-1, 0.95], [2, 0.95], [2, 2], [-1, 2]] left_wall = [[0.05, -1], [0.05, 4], [-1, 4], [-1, -1]] right_wall = [[0.95, -1], [0.95, 4], [2, 4], [2, -1]] walls = [ sprite.Sprite(shape=np.array(v), x=0, y=0, c0=128, c1=128, c2=128) for v in [bottom_wall, top_wall, left_wall, right_wall] ] def state_initializer(): """Callable returning new state at each episode reset.""" obstacles = obstacle_generator(disjoint=True) ball = ball_generator(without_overlapping=obstacles) if len(obstacles) < num_obstacles + 2 or not ball: # Max recursion depth failed trying to generate without overlapping return state_initializer() red = obstacles[0] green = obstacles[1] obstacles = obstacles[2:] # Set the colors of the red and green boxes red.c0 = 255 red.c1 = 0 red.c2 = 0 green.c0 = 0 green.c1 = 255 green.c2 = 0 # Create agent and response tokens at the bottom of the sreen agent = sprite.Sprite(x=0.5, y=0.06, shape='spoke_4', scale=0.03, c0=255, c1=255, c2=255) responses = [ sprite.Sprite(x=0.6, y=0.06, shape='square', scale=0.03, c0=255, c1=0, c2=0), sprite.Sprite(x=0.4, y=0.06, shape='square', scale=0.03, c0=0, c1=255, c2=0), ] state = collections.OrderedDict([ ('walls', walls + obstacles), ('red', [red]), ('green', [green]), ('ball', ball), ('responses', responses), ('agent', [agent]), ]) # Rejection sampling if trial won't finish in valid step range original_ball_position = np.copy(ball[0].position) original_ball_velocity = np.copy(ball[0].velocity) valid_trial, contact_red = _predict_trial_end(state) if valid_trial: ball[0].position = original_ball_position ball[0].velocity = original_ball_velocity agent.metadata = {'true_contact_color': contact_red} else: return state_initializer() return state ############################################################################ # Task ############################################################################ def _reward_fn(sprite_agent, sprite_response): response_green = sprite_response.c0 < 128 if sprite_agent.metadata['true_contact_color'] == response_green: return 1. else: return -1. contact_reward = tasks.ContactReward( reward_fn=_reward_fn, layers_0='agent', layers_1='responses', reset_steps_after_contact=10, ) task = tasks.CompositeTask(contact_reward, timeout_steps=400) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Grid( scaling_factor=0.015, action_layers='agent', control_velocity=True, ) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1) ############################################################################ # Game rules ############################################################################ # Stop ball on contact with red or green box def _stop_ball(s): s.velocity = np.zeros(2) stop_ball = game_rules.ModifyOnContact(layers_0='ball', layers_1=('red', 'green'), modifier_0=_stop_ball) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': (stop_ball, ), } return config
def get_config(_): """Get environment config""" ############################################################################ # Sprite initialization ############################################################################ # Ball generator ball_factors = distribs.Product( [ distribs.Continuous('x', 0.25, 0.75), distribs.Continuous('y', 0.5, 0.9), distribs.Continuous('x_vel', -0.01, 0.01) ], scale=0.1, shape='circle', c0=0, c1=0, c2=255, mass=1., ) ball_generator = sprite_generators.generate_sprites(ball_factors, num_sprites=4) # Walls bottom_wall = [[-1, 0.1], [2, 0.1], [2, -1], [-1, -1]] left_wall = [[0.05, -0.1], [0.05, 1.1], [-1, 1.1], [-1, -0.1]] right_wall = [[0.95, -0.1], [0.95, 1.1], [2, 1.1], [2, -0.1]] divider = [[0.45, -1], [0.45, 0.3], [0.55, 0.3], [0.55, -1]] walls = [ sprite_lib.Sprite(shape=np.array(v), x=0, y=0, c0=128, c1=128, c2=128) for v in [bottom_wall, left_wall, right_wall, divider] ] def state_initializer(): """Callable returning new state at each episode reset.""" state = collections.OrderedDict([ ('walls', walls), ('balls', ball_generator(disjoint=True)), ('agent', []), ]) return state ############################################################################ # Physics ############################################################################ # Setting max_recursion_depth > 0 can increase stability # Setting update_angle_vel = False is recommended for stability collision = physics_lib.Collision( elasticity=0.6, symmetric=False, update_angle_vel=False, max_recursion_depth=2, ) physics = physics_lib.Physics( (collision, 'balls', ['balls', 'walls']), (physics_lib.DownGravity(g=-0.001), 'balls'), updates_per_env_step=20, ) ############################################################################ # Task ############################################################################ task = tasks.CompositeTask(timeout_steps=100) ############################################################################ # Action space ############################################################################ # Need an action space, so let it control an empty agent layer action_space = action_spaces.Grid(action_layers='agent') ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': (), } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0.33, c1=1., c2=0.7, ) # Predators predator_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0., c1=1., c2=0.8, ) # Prey prey_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0.2, c1=1., c2=1., ) # Boosters booster_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='triangle', scale=0.1, c0=0.6, c1=1., c2=1., ) # Portals portal_factors = dict(shape='square', scale=0.1, c0=0., c1=0., c2=0.95) portal_sprites = [ sprite.Sprite(x=0.125, y=0.125, **portal_factors), sprite.Sprite(x=0.875, y=0.875, **portal_factors), ] # Walls wall_color = dict(c0=0., c1=0., c2=0.5) island_wall_shape_0 = np.array([[0.2, 0.2], [0.4, 0.2], [0.4, 0.4], [0.2, 0.4]]) island_wall_shapes = [ island_wall_shape_0, island_wall_shape_0 + np.array([[0., 0.4]]), island_wall_shape_0 + np.array([[0.4, 0.4]]), island_wall_shape_0 + np.array([[0.4, 0.]]), ] island_walls = [ sprite.Sprite(shape=shape, x=0., y=0., **wall_color) for shape in island_wall_shapes ] boundary_walls = shapes.border_walls(visible_thickness=0.05, **wall_color) walls = boundary_walls + island_walls # Callable sprite generators agent_generator = sprite_generators.generate_sprites(agent_factors, num_sprites=1) predator_generator = sprite_generators.generate_sprites(predator_factors, num_sprites=1) prey_generator = sprite_generators.generate_sprites( prey_factors, num_sprites=lambda: np.random.randint(2, 5)) booster_generator = sprite_generators.generate_sprites(booster_factors, num_sprites=2) # Create callable initializer returning entire state def state_initializer(): portals = portal_sprites agent = agent_generator(without_overlapping=walls) predators = predator_generator(without_overlapping=walls + agent) boosters = booster_generator(without_overlapping=walls + agent) prey = prey_generator(without_overlapping=walls) state = collections.OrderedDict([ ('walls', walls), ('portals', portals), ('boosters', boosters), ('prey', prey), ('predators', predators), ('agent', agent), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) predator_friction_force = physics_lib.Drag(coeff_friction=0.05) predator_random_force = physics_lib.RandomForce(max_force_magnitude=0.02) prey_friction_force = physics_lib.Drag(coeff_friction=0.02) prey_random_force = physics_lib.RandomForce(max_force_magnitude=0.02) predator_attraction = physics_lib.DistanceForce( force_fn=physics_lib.linear_force_fn(zero_intercept=-0.002, slope=0.001)) asymmetric_collision = physics_lib.Collision(elasticity=0.25, symmetric=False, update_angle_vel=False) forces = ( (agent_friction_force, 'agent'), (predator_friction_force, 'predators'), (predator_random_force, 'predators'), (prey_friction_force, 'prey'), (prey_random_force, 'prey'), (predator_attraction, 'agent', 'predators'), (asymmetric_collision, ['agent', 'predators', 'prey'], 'walls'), ) physics = physics_lib.Physics(*forces, updates_per_env_step=5) ############################################################################ # Task ############################################################################ predator_task = tasks.ContactReward(-5, layers_0='agent', layers_1='predators', reset_steps_after_contact=0) prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: len(state['prey']) == 0, steps_after_condition=5, ) task = tasks.CompositeTask(predator_task, prey_task, reset_task, timeout_steps=400) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.01, action_layers='agent', ) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', ) ############################################################################ # Game rules ############################################################################ disappear_rule = game_rules.VanishOnContact(vanishing_layer='prey', contacting_layer='agent') portal_rule = game_rules.Portal(teleporting_layer='agent', portal_layer='portals') rules = (disappear_rule, portal_rule, Booster()) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Occluder occluder_shape = np.array([[-0.1, 0.2], [1.1, 0.2], [1.1, 0.6], [-0.1, 0.6]]) occluder = sprite.Sprite(x=0., y=0., shape=occluder_shape, scale=1., c0=0.6, c1=1., c2=1.) # Prey prey_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.8), distribs.Continuous('x_vel', -0.01, 0.01) ], y=1.2, y_vel=-0.007, shape='circle', scale=0.07, c0=0.2, c1=1., c2=1., ) # Walls left_wall = [[0.05, -0.2], [0.05, 2], [-1, 2], [-1, -0.2]] right_wall = [[0.95, -0.2], [0.95, 2], [2, 2], [2, -0.2]] walls = [ sprite.Sprite(shape=np.array(v), x=0, y=0, c0=0., c1=0., c2=0.5) for v in [left_wall, right_wall] ] def state_initializer(): agent = sprite.Sprite(x=0.5, y=0.1, shape='square', aspect_ratio=0.2, scale=0.1, c0=0.33, c1=1., c2=0.66) state = collections.OrderedDict([ ('walls', walls), ('prey', [sprite.Sprite(**prey_factors.sample())]), ('agent', [agent]), ('occluder', [occluder]), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) asymmetric_collision = physics_lib.Collision(elasticity=1., symmetric=False, update_angle_vel=False) inelastic_collision = physics_lib.Collision(elasticity=0., symmetric=False, update_angle_vel=False) physics = physics_lib.Physics( (agent_friction_force, 'agent'), (inelastic_collision, 'agent', 'walls'), (asymmetric_collision, 'prey', 'walls'), updates_per_env_step=10, ) ############################################################################ # Task ############################################################################ contact_task = tasks.ContactReward(1., layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: all([s.y < 0. for s in state['prey']]), steps_after_condition=15, ) task = tasks.CompositeTask(contact_task, reset_task) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick(scaling_factor=0.002, action_layers='agent', constrained_lr=True) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer(image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb') ############################################################################ # Game rules ############################################################################ prey_vanish = game_rules.VanishOnContact( vanishing_layer='prey', contacting_layer='agent', ) rules = (prey_vanish, ) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def get_config(num_targets): """Get environment config. Args: num_targets: Int. Number of targets. """ ############################################################################ # Sprite initialization ############################################################################ # Target circles target_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), RadialVelocity(speed=0.01), ], scale=0.1, shape='circle', c0=0., c1=0., c2=0.9, ) # Target bars bar_factors = dict(scale=0.1, shape='square', aspect_ratio=0.3, c0=0., c1=0., c2=0.2) # Walls bottom_wall = [[-1, 0], [2, 0], [2, -1], [-1, -1]] top_wall = [[-1, 1], [2, 1], [2, 2], [-1, 2]] left_wall = [[0, -1], [0, 4], [-1, 4], [-1, -1]] right_wall = [[1, -1], [1, 4], [2, 4], [2, -1]] walls = [ sprite.Sprite(shape=np.array(v), x=0, y=0, c0=0., c1=0., c2=0.5) for v in [bottom_wall, top_wall, left_wall, right_wall] ] # Occluder occluder_factors = dict(x=0.5, y=0.5, c0=0.6, c1=0.25, c2=0.5, opacity=0) # Cross shape for agent and fixation cross cross_shape = 0.1 * np.array([[-5, 1], [-1, 1], [-1, 5], [1, 5], [1, 1], [5, 1], [5, -1], [1, -1], [1, -5], [-1, -5], [-1, -1], [-5, -1]]) def state_initializer(): fixation = sprite.Sprite(x=0.5, y=0.5, shape=cross_shape, scale=0.1, c0=0., c1=0., c2=0.) screen = sprite.Sprite(x=0.5, y=0.5, shape='square', scale=2., c0=0., c1=0., c2=1.) agent = sprite.Sprite(x=0.5, y=0.5, scale=0.04, shape=cross_shape, c0=0.33, c1=1., c2=1.) occluder_shape = shapes.annulus_vertices(0.13, 2.) occluder = sprite.Sprite(shape=occluder_shape, **occluder_factors) targets = [ sprite.Sprite(**target_factors.sample()) for _ in range(num_targets) ] bar_angles = 0.5 * np.pi * np.random.binomial(1, 0.5, (num_targets)) bars = [ sprite.Sprite(x=s.x, y=s.y, x_vel=s.x_vel, y_vel=s.y_vel, angle=angle, **bar_factors) for s, angle in zip(targets, bar_angles) ] state = collections.OrderedDict([ ('walls', walls), ('targets', targets), ('bars', bars), ('occluder', [occluder]), ('screen', [screen]), ('fixation', [fixation]), ('agent', [agent]), ]) return state ############################################################################ # Physics ############################################################################ elastic_collision = physics_lib.Collision(elasticity=1., symmetric=False, update_angle_vel=False) tether = physics_lib.TetherZippedLayers(layer_names=('targets', 'bars'), update_angle_vel=False) physics = physics_lib.Physics( (elastic_collision, 'targets', 'walls'), updates_per_env_step=10, corrective_physics=[tether], ) ############################################################################ # Task ############################################################################ def _reward_condition(_, meta_state): return meta_state['phase'] == 'reward' task = tasks.Reset( condition=_reward_condition, reward_fn=lambda _: 1, steps_after_condition=10, ) ############################################################################ # Action space ############################################################################ action_space = action_spaces.SetPosition(action_layers=('agent', 'occluder')) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb=observers.color_maps.hsv_to_rgb, ) ############################################################################ # Game rules ############################################################################ # Fixation phase fixation_rule = gr.Fixation('agent', 'fixation', _FIXATION_THRESHOLD, 'fixation_duration') def _should_end_fixation(_, meta_state): return meta_state['fixation_duration'] >= 15 fixation_phase = gr.Phase( continual_rules=fixation_rule, end_condition=_should_end_fixation, name='fixation', ) # Visible Phase vanish_fixation = gr.VanishByFilter('fixation', lambda _: True) vanish_screen = gr.VanishByFilter('screen', lambda _: True) visible_phase = gr.Phase( one_time_rules=[vanish_fixation, vanish_screen], duration=5, name='visible', ) # Tracking Phase def _make_opaque(s): s.opacity = 255 appear_occluder = gr.ModifySprites('occluder', _make_opaque) tracking_phase = gr.Phase( one_time_rules=appear_occluder, duration=lambda: np.random.randint(40, 80), name='tracking', ) # Change Phase fixation_response_rule = gr.Fixation('agent', 'targets', _FIXATION_THRESHOLD, 'response_duration') def _should_end_change(_, meta_state): return meta_state['response_duration'] >= 30 change_phase = gr.Phase( one_time_rules=ChangeTargetFeature(), continual_rules=fixation_response_rule, name='change', end_condition=_should_end_change, ) # Reward Phase def _make_transparent(s): s.opacity = 0 disappear_occluder = gr.ModifySprites('occluder', _make_transparent) def _glue(s): s.velocity = np.zeros(2) glue_targets = gr.ModifySprites(('targets', 'bars'), _glue) reward_phase = gr.Phase( one_time_rules=(disappear_occluder, glue_targets), name='reward', ) phase_sequence = gr.PhaseSequence( fixation_phase, visible_phase, tracking_phase, change_phase, reward_phase, meta_state_phase_name_key='phase', ) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer, 'state': observers.RawState() }, 'game_rules': (phase_sequence, ), 'meta_state_initializer': lambda: { 'phase': '' }, } return config