def get_config(mode='train'): """Generate environment config. Args: mode: 'train' or 'test'. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ # Select clusters to use, and their c0 factor distribution. c0_clusters = [CLUSTERS_DISTS[cluster] for cluster in MODES[mode]] print('Clustering task: {}, #sprites: {}'.format(MODES[mode], NUM_SPRITES_PER_CLUSTER)) other_factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), ]) # Generate the sprites to be used in this task, by combining Hue with the # other factors. sprite_factors = [ distribs.Product((other_factors, c0)) for c0 in c0_clusters ] # Convert to sprites, generating the appropriate number per cluster. sprite_gen_per_cluster = [ sprite_generators.generate_sprites(factors, num_sprites=NUM_SPRITES_PER_CLUSTER) for factors in sprite_factors ] # Concat clusters into single scene to generate. sprite_gen = sprite_generators.chain_generators(*sprite_gen_per_cluster) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) # Clustering task will define rewards task = tasks.Clustering(c0_clusters, terminate_bonus=0., reward_range=10., sparse_reward=True) config = { 'task': task, 'action_space': common.noisy_action_space(), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': MAX_EPISODE_LENGTH, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def get_config(mode='train'): """Generate environment config. Args: mode: 'train' or 'test'. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ shared_factors = distribs.Product([ distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), ]) target_hue = distribs.Continuous('c0', 0., 0.4) distractor_hue = distribs.Continuous('c0', 0.5, 0.9) target_factors = distribs.Product([ MODES_TARGET_POSITIONS[mode], target_hue, shared_factors, ]) distractor_factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distractor_hue, shared_factors, ]) target_sprite_gen = sprite_generators.generate_sprites( target_factors, num_sprites=NUM_TARGETS) distractor_sprite_gen = sprite_generators.generate_sprites( distractor_factors, num_sprites=NUM_DISTRACTORS) sprite_gen = sprite_generators.chain_generators(target_sprite_gen, distractor_sprite_gen) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) task = tasks.FindGoalPosition(filter_distrib=target_hue, terminate_distance=TERMINATE_DISTANCE) config = { 'task': task, 'action_space': common.action_space(), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': 20, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def get_config(mode='train'): """Generate environment config. Args: mode: 'train' or 'test'. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ factors = distribs.Product([ MODES_SHAPES[mode], distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('scale', [0.2]), distribs.Discrete('c0', [0.9, 0.55, 0.27], probs=[0.333, 0.334, 0.333]), distribs.Discrete('c1', [0.6]), distribs.Continuous('c2', 0.9, 1.), ]) sprite_gen = sprite_generators.generate_sprites(factors, num_sprites=NUM_TARGETS, fix_colors=True) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) task = tasks.FindGoalPosition(terminate_distance=TERMINATE_DISTANCE, sparse_reward=True) config = { 'task': task, 'action_space': common.noisy_action_space(MOTION_STD_DEV, PROPORTIONAL_MOTION_NOISE, None), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': 60, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def get_config(mode=None): """Generate environment config. Args: mode: Unused. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ del mode # No train/test split for pure exploration factors = distribs.Product([ distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c0', 0., 1.), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), ]) num_sprites = lambda: np.random.randint(1, 7) sprite_gen = sprite_generators.generate_sprites(factors, num_sprites=num_sprites) task = tasks.NoReward() config = { 'task': task, 'action_space': common.action_space(), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': 10, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config
def get_config(mode='train'): """Generate environment config. Args: mode: 'train' or 'test'. Returns: config: Dictionary defining task/environment configuration. Can be fed as kwargs to environment.Environment. """ # Create the subtasks and their corresponding sprite generators subtasks = [] sprite_gen_per_subtask = [] for subtask in SUBTASKS: subtasks.append( tasks.FindGoalPosition( filter_distrib=subtask['distrib'], goal_position=subtask['goal_position'], terminate_distance=TERMINATE_DISTANCE, raw_reward_multiplier=RAW_REWARD_MULTIPLIER)) factors = distribs.Product(( subtask['distrib'], distribs.Continuous('x', 0.1, 0.9), distribs.Continuous('y', 0.1, 0.9), distribs.Discrete('shape', ['square', 'triangle', 'circle']), distribs.Discrete('scale', [0.13]), distribs.Continuous('c1', 0.3, 1.), distribs.Continuous('c2', 0.9, 1.), )) sprite_gen_per_subtask.append( sprite_generators.generate_sprites(factors, num_sprites=1)) # Consider all combinations of subtasks subtask_combos = list( itertools.combinations(np.arange(len(SUBTASKS)), NUM_TARGETS)) if mode == 'train': # Randomly sample a combination of subtasks, holding one combination out sprite_gen = sprite_generators.sample_generator([ sprite_generators.chain_generators( *[sprite_gen_per_subtask[i] for i in c]) for c in subtask_combos[1:] ]) elif mode == 'test': # Use the held-out subtask combination for testing sprite_gen = sprite_generators.chain_generators( *[sprite_gen_per_subtask[i] for i in subtask_combos[0]]) else: raise ValueError('Invalide mode {}.'.format(mode)) # Randomize sprite ordering to eliminate any task information from occlusions sprite_gen = sprite_generators.shuffle(sprite_gen) task = tasks.MetaAggregated(subtasks, reward_aggregator='sum', termination_criterion='all') config = { 'task': task, 'action_space': common.action_space(), 'renderers': common.renderers(), 'init_sprites': sprite_gen, 'max_episode_length': MAX_EPISODE_LENGTH, 'metadata': { 'name': os.path.basename(__file__), 'mode': mode } } return config