def get_state(): """Get initial state.""" sprite_0 = sprite.Sprite(x=0.5, y=0.7, scale=0.1, shape='triangle', x_vel=0.04, y_vel=-0.02, c0=255, angle=2.) sprite_1 = sprite.Sprite(x=0.2, y=0.6, scale=0.1, shape='triangle', x_vel=0., y_vel=0., c1=255, angle=1.) sprite_2 = sprite.Sprite(x=0.6, y=0.3, scale=0.1, shape='triangle', x_vel=0., y_vel=0., c2=255) sprites = [sprite_0, sprite_1, sprite_2] walls = shapes.border_walls(visible_thickness=0.05, c0=128, c1=128, c2=128) state = collections.OrderedDict([('walls', walls), ('sprites', sprites)]) return state
def __init__(self, num_predators, step_scaling_factor, threshold_trial_len): """Constructor. This class uses the meta-state to keep track of the number of steps before the agent is caught. See the game rules section near the bottom of this file for the counter incrementer. Args: step_scaling_factor: Float. Fractional decrease of predator mass after a trial longer than threshold_trial_len. Also used as fractional increase of predator mass after a trial shorter than threshold_trial_len. Should be small and positive. threshold_trial_len: Length of a trial above which the predator mass is decreased and below which the predator mass is increased. """ self._mass = 1. self._step_scaling_factor = step_scaling_factor self._threshold_trial_len = threshold_trial_len # Agent agent_factors = distribs.Product( [ distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0., 1.) ], shape='circle', scale=0.1, c0=0.33, c1=1., c2=0.66, ) self._agent_generator = sprite_generators.generate_sprites( agent_factors, num_sprites=1) # Predators predator_factors = distribs.Product( [ distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0., 1.) ], shape='circle', scale=0.1, c0=0., c1=1., c2=0.8, ) self._predator_generator = sprite_generators.generate_sprites( predator_factors, num_sprites=num_predators) # Walls self._walls = shapes.border_walls(visible_thickness=0., c0=0., c1=0., c2=0.5) self._meta_state = None
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = distribs.Product( [distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9)], shape='circle', scale=0.1, c0=0.33, c1=1., c2=0.66, ) # Predators shape_0 = 1.8 * np.array( [[-0.3, -0.3], [0.1, -0.7], [0.4, 0.6], [-0.1, 0.25]]) shape_1 = 1.5 * np.array( [[-0.5, -0.3], [-0.1, -0.7], [0.7, 0.1], [0., -0.1], [-0.3, 0.25]]) predator_factors = distribs.Product( [distribs.Continuous('x', 0.2, 0.8), distribs.Continuous('y', 0.2, 0.8), distribs.Discrete( 'shape', [shape_0, shape_1, 'star_5', 'triangle', 'spoke_5']), distribs.Continuous('angle', 0., 2 * np.pi), distribs.Continuous('aspect_ratio', 0.75, 1.25), distribs.Continuous('scale', 0.1, 0.15), distribs.Continuous('x_vel', -0.03, 0.03), distribs.Continuous('y_vel', -0.03, 0.03), distribs.Continuous('angle_vel', -0.05, 0.05)], c0=0., c1=1., c2=0.8, ) # Walls walls = shapes.border_walls(visible_thickness=0.05, c0=0., c1=0., c2=0.5) # Create callable initializer returning entire state agent_generator = sprite_generators.generate_sprites( agent_factors, num_sprites=1) predator_generator = sprite_generators.generate_sprites( predator_factors, num_sprites=5) def state_initializer(): predators = predator_generator( disjoint=True, without_overlapping=walls) agent = agent_generator(without_overlapping=walls + predators) state = collections.OrderedDict([ ('walls', walls), ('predators', predators), ('agent', agent), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) asymmetric_collision = physics_lib.Collision( elasticity=1., symmetric=False, update_angle_vel=True) symmetric_collision = physics_lib.Collision( elasticity=1., symmetric=True, update_angle_vel=True) agent_wall_collision = physics_lib.Collision( elasticity=0., symmetric=False, update_angle_vel=False) forces = ( (agent_friction_force, 'agent'), (symmetric_collision, 'predators', 'predators'), (asymmetric_collision, 'predators', 'walls'), (agent_wall_collision, 'agent', 'walls'), ) physics = physics_lib.Physics(*forces, updates_per_env_step=10) ############################################################################ # Task ############################################################################ predator_task = tasks.ContactReward( -5, layers_0='agent', layers_1='predators') stay_alive_task = tasks.StayAlive( reward_period=20, reward_value=0.2, ) task = tasks.CompositeTask( predator_task, stay_alive_task, timeout_steps=200) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.01, action_layers='agent') ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb') ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': {'image': observer}, } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agent agent_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0.33, c1=1., c2=0.7, ) # Predators predator_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0., c1=1., c2=0.8, ) # Prey prey_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='circle', scale=0.1, c0=0.2, c1=1., c2=1., ) # Boosters booster_factors = distribs.Product( [ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9) ], shape='triangle', scale=0.1, c0=0.6, c1=1., c2=1., ) # Portals portal_factors = dict(shape='square', scale=0.1, c0=0., c1=0., c2=0.95) portal_sprites = [ sprite.Sprite(x=0.125, y=0.125, **portal_factors), sprite.Sprite(x=0.875, y=0.875, **portal_factors), ] # Walls wall_color = dict(c0=0., c1=0., c2=0.5) island_wall_shape_0 = np.array([[0.2, 0.2], [0.4, 0.2], [0.4, 0.4], [0.2, 0.4]]) island_wall_shapes = [ island_wall_shape_0, island_wall_shape_0 + np.array([[0., 0.4]]), island_wall_shape_0 + np.array([[0.4, 0.4]]), island_wall_shape_0 + np.array([[0.4, 0.]]), ] island_walls = [ sprite.Sprite(shape=shape, x=0., y=0., **wall_color) for shape in island_wall_shapes ] boundary_walls = shapes.border_walls(visible_thickness=0.05, **wall_color) walls = boundary_walls + island_walls # Callable sprite generators agent_generator = sprite_generators.generate_sprites(agent_factors, num_sprites=1) predator_generator = sprite_generators.generate_sprites(predator_factors, num_sprites=1) prey_generator = sprite_generators.generate_sprites( prey_factors, num_sprites=lambda: np.random.randint(2, 5)) booster_generator = sprite_generators.generate_sprites(booster_factors, num_sprites=2) # Create callable initializer returning entire state def state_initializer(): portals = portal_sprites agent = agent_generator(without_overlapping=walls) predators = predator_generator(without_overlapping=walls + agent) boosters = booster_generator(without_overlapping=walls + agent) prey = prey_generator(without_overlapping=walls) state = collections.OrderedDict([ ('walls', walls), ('portals', portals), ('boosters', boosters), ('prey', prey), ('predators', predators), ('agent', agent), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) predator_friction_force = physics_lib.Drag(coeff_friction=0.05) predator_random_force = physics_lib.RandomForce(max_force_magnitude=0.02) prey_friction_force = physics_lib.Drag(coeff_friction=0.02) prey_random_force = physics_lib.RandomForce(max_force_magnitude=0.02) predator_attraction = physics_lib.DistanceForce( force_fn=physics_lib.linear_force_fn(zero_intercept=-0.002, slope=0.001)) asymmetric_collision = physics_lib.Collision(elasticity=0.25, symmetric=False, update_angle_vel=False) forces = ( (agent_friction_force, 'agent'), (predator_friction_force, 'predators'), (predator_random_force, 'predators'), (prey_friction_force, 'prey'), (prey_random_force, 'prey'), (predator_attraction, 'agent', 'predators'), (asymmetric_collision, ['agent', 'predators', 'prey'], 'walls'), ) physics = physics_lib.Physics(*forces, updates_per_env_step=5) ############################################################################ # Task ############################################################################ predator_task = tasks.ContactReward(-5, layers_0='agent', layers_1='predators', reset_steps_after_contact=0) prey_task = tasks.ContactReward(1, layers_0='agent', layers_1='prey') reset_task = tasks.Reset( condition=lambda state: len(state['prey']) == 0, steps_after_condition=5, ) task = tasks.CompositeTask(predator_task, prey_task, reset_task, timeout_steps=400) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Joystick( scaling_factor=0.01, action_layers='agent', ) ############################################################################ # Observer ############################################################################ observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', ) ############################################################################ # Game rules ############################################################################ disappear_rule = game_rules.VanishOnContact(vanishing_layer='prey', contacting_layer='agent') portal_rule = game_rules.Portal(teleporting_layer='agent', portal_layer='portals') rules = (disappear_rule, portal_rule, Booster()) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': { 'image': observer }, 'game_rules': rules, } return config
def get_config(_): """Get environment config.""" ############################################################################ # Sprite initialization ############################################################################ # Agents agent_factors = distribs.Product( [distribs.Continuous('x', 0., 1.), distribs.Continuous('y', 0.35, 0.65)], shape='circle', scale=0.1, c1=1., c2=0.7, ) agent_0_factors = distribs.Product([agent_factors], c0=0.2) agent_1_factors = distribs.Product([agent_factors], c0=0.1) agent_2_factors = distribs.Product([agent_factors], c0=0.) # Walls walls = shapes.border_walls(visible_thickness=0.05, c0=0., c1=0., c2=0.5) # Fountains fountain_factors = { 'shape': 'circle', 'scale': 0.05, 'c0': 0.6, 'c1': 1., 'c2': _BAD_VALUE} fountains_across = np.linspace(0.1, 0.9, 6) fountains_up = np.linspace(0.75, 0.9, 2) fountains_grid_x, fountains_grid_y = np.meshgrid(fountains_across, fountains_up) fountains_positions = zip(np.ravel(fountains_grid_x), np.ravel(fountains_grid_y)) fountain_sprites = [ sprite.Sprite(x=x, y=y, **fountain_factors) for (x, y) in fountains_positions ] # Fruits fruit_factors = { 'shape': 'circle', 'scale': 0.05, 'c0': 0.3, 'c1': 1., 'c2': _BAD_VALUE} fruits_across = np.linspace(0.1, 0.9, 6) fruits_up = np.linspace(0.1, 0.25, 2) fruits_grid_x, fruits_grid_y = np.meshgrid(fruits_across, fruits_up) fruits_positions = zip(np.ravel(fruits_grid_x), np.ravel(fruits_grid_y)) fruit_sprites = [ sprite.Sprite(x=x, y=y, **fruit_factors) for (x, y) in fruits_positions ] # Create callable initializer returning entire state agent_0_generator = sprite_generators.generate_sprites( agent_0_factors, num_sprites=1) agent_1_generator = sprite_generators.generate_sprites( agent_1_factors, num_sprites=1) agent_2_generator = sprite_generators.generate_sprites( agent_2_factors, num_sprites=1) def state_initializer(): agent_0 = agent_0_generator(without_overlapping=walls) agent_1 = agent_1_generator(without_overlapping=walls) agent_2 = agent_2_generator(without_overlapping=walls) state = collections.OrderedDict([ ('walls', walls), ('fountains', fountain_sprites), ('fruits', fruit_sprites), ('agent_2', agent_2), ('agent_1', agent_1), ('agent_0', agent_0), ]) return state ############################################################################ # Physics ############################################################################ agent_friction_force = physics_lib.Drag(coeff_friction=0.25) asymmetric_collision = physics_lib.Collision( elasticity=0.25, symmetric=False) forces = ( (agent_friction_force, ['agent_0', 'agent_1', 'agent_2']), (asymmetric_collision, ['agent_0', 'agent_1', 'agent_2'], 'walls'), ) physics = physics_lib.Physics(*forces, updates_per_env_step=5) ############################################################################ # Task ############################################################################ task = tasks.ContactReward( 1, layers_0='agent_0', layers_1='fruits', condition=lambda s_0, s_1: s_1.c2 > _VALUE_THRESHOLD, ) ############################################################################ # Action space ############################################################################ action_space = action_spaces.Composite( agent_0=action_spaces.Joystick( scaling_factor=0.005, action_layers='agent_0'), agent_1=action_spaces.Joystick( scaling_factor=0.005, action_layers='agent_1'), agent_2=action_spaces.Joystick( scaling_factor=0.005, action_layers='agent_2'), ) ############################################################################ # Observer ############################################################################ image_observer = observers.PILRenderer( image_size=(64, 64), anti_aliasing=1, color_to_rgb='hsv_to_rgb', ) raw_state_observer = observers.RawState() # needed by hand-crafted agents ############################################################################ # Game rules ############################################################################ def _spoil_fruit(sprite): sprite.c2 = _BAD_VALUE def _ripen_fruit(sprite): sprite.c2 = _GOOD_VALUE def _poison_fountain(sprite): sprite.c2 = _BAD_VALUE def _clean_fountain(sprite): sprite.c2 = _GOOD_VALUE def agents_contacting_layer(state, layer, value): n_contact = 0 for s in state[layer]: if s.c2 != value: continue n_contact += ( s.overlaps_sprite(state['agent_0'][0]) or s.overlaps_sprite(state['agent_1'][0]) or s.overlaps_sprite(state['agent_2'][0]) ) return n_contact poison_fountains = game_rules.ModifySprites( layers='fountains', modifier=_poison_fountain, sample_one=True, filter_fn=lambda s: s.c2 > _VALUE_THRESHOLD) poison_fountains = game_rules.ConditionalRule( condition=lambda s: agents_contacting_layer(s, 'fruits', _GOOD_VALUE), rules=poison_fountains, ) ripen_fruits = game_rules.ModifySprites( layers='fruits', modifier=_ripen_fruit, sample_one=True, filter_fn=lambda s: s.c2 < _VALUE_THRESHOLD) ripen_fruits = game_rules.ConditionalRule( condition=lambda s: agents_contacting_layer(s, 'fountains', _BAD_VALUE), rules=ripen_fruits, ) spoil_fruits = game_rules.ModifyOnContact( layers_0='fruits', layers_1=('agent_0', 'agent_1', 'agent_2'), modifier_0=_spoil_fruit, filter_0=lambda s: s.c2 > _VALUE_THRESHOLD) clean_fountains = game_rules.ModifyOnContact( layers_0='fountains', layers_1=('agent_0', 'agent_1', 'agent_2'), modifier_0=_clean_fountain, filter_0=lambda s: s.c2 < _VALUE_THRESHOLD) rules = (poison_fountains, spoil_fruits, ripen_fruits, clean_fountains) ############################################################################ # Final config ############################################################################ config = { 'state_initializer': state_initializer, 'physics': physics, 'task': task, 'action_space': action_space, 'observers': {'image': image_observer, 'state': raw_state_observer}, 'game_rules': rules, } return config