예제 #1
0
def get_config(mode='train'):
    """Generate environment config.

  Args:
    mode: 'train' or 'test'.

  Returns:
    config: Dictionary defining task/environment configuration. Can be fed as
      kwargs to environment.Environment.
  """

    # Select clusters to use, and their c0 factor distribution.
    c0_clusters = [CLUSTERS_DISTS[cluster] for cluster in MODES[mode]]
    print('Clustering task: {}, #sprites: {}'.format(MODES[mode],
                                                     NUM_SPRITES_PER_CLUSTER))

    other_factors = distribs.Product([
        distribs.Continuous('x', 0.1, 0.9),
        distribs.Continuous('y', 0.1, 0.9),
        distribs.Discrete('shape', ['square', 'triangle', 'circle']),
        distribs.Discrete('scale', [0.13]),
        distribs.Continuous('c1', 0.3, 1.),
        distribs.Continuous('c2', 0.9, 1.),
    ])

    # Generate the sprites to be used in this task, by combining Hue with the
    # other factors.
    sprite_factors = [
        distribs.Product((other_factors, c0)) for c0 in c0_clusters
    ]
    # Convert to sprites, generating the appropriate number per cluster.
    sprite_gen_per_cluster = [
        sprite_generators.generate_sprites(factors,
                                           num_sprites=NUM_SPRITES_PER_CLUSTER)
        for factors in sprite_factors
    ]
    # Concat clusters into single scene to generate.
    sprite_gen = sprite_generators.chain_generators(*sprite_gen_per_cluster)
    # Randomize sprite ordering to eliminate any task information from occlusions
    sprite_gen = sprite_generators.shuffle(sprite_gen)

    # Clustering task will define rewards
    task = tasks.Clustering(c0_clusters,
                            terminate_bonus=0.,
                            reward_range=10.,
                            sparse_reward=True)

    config = {
        'task': task,
        'action_space': common.noisy_action_space(),
        'renderers': common.renderers(),
        'init_sprites': sprite_gen,
        'max_episode_length': MAX_EPISODE_LENGTH,
        'metadata': {
            'name': os.path.basename(__file__),
            'mode': mode
        }
    }
    return config
예제 #2
0
def get_config(mode='train'):
    """Generate environment config.

  Args:
    mode: 'train' or 'test'.

  Returns:
    config: Dictionary defining task/environment configuration. Can be fed as
      kwargs to environment.Environment.
  """

    shared_factors = distribs.Product([
        distribs.Discrete('shape', ['square', 'triangle', 'circle']),
        distribs.Discrete('scale', [0.13]),
        distribs.Continuous('c1', 0.3, 1.),
        distribs.Continuous('c2', 0.9, 1.),
    ])
    target_hue = distribs.Continuous('c0', 0., 0.4)
    distractor_hue = distribs.Continuous('c0', 0.5, 0.9)
    target_factors = distribs.Product([
        MODES_TARGET_POSITIONS[mode],
        target_hue,
        shared_factors,
    ])
    distractor_factors = distribs.Product([
        distribs.Continuous('x', 0.1, 0.9),
        distribs.Continuous('y', 0.1, 0.9),
        distractor_hue,
        shared_factors,
    ])

    target_sprite_gen = sprite_generators.generate_sprites(
        target_factors, num_sprites=NUM_TARGETS)
    distractor_sprite_gen = sprite_generators.generate_sprites(
        distractor_factors, num_sprites=NUM_DISTRACTORS)
    sprite_gen = sprite_generators.chain_generators(target_sprite_gen,
                                                    distractor_sprite_gen)
    # Randomize sprite ordering to eliminate any task information from occlusions
    sprite_gen = sprite_generators.shuffle(sprite_gen)

    task = tasks.FindGoalPosition(filter_distrib=target_hue,
                                  terminate_distance=TERMINATE_DISTANCE)

    config = {
        'task': task,
        'action_space': common.action_space(),
        'renderers': common.renderers(),
        'init_sprites': sprite_gen,
        'max_episode_length': 20,
        'metadata': {
            'name': os.path.basename(__file__),
            'mode': mode
        }
    }
    return config
def get_config(mode='train'):
    """Generate environment config.

  Args:
    mode: 'train' or 'test'.

  Returns:
    config: Dictionary defining task/environment configuration. Can be fed as
      kwargs to environment.Environment.
  """

    factors = distribs.Product([
        MODES_SHAPES[mode],
        distribs.Continuous('x', 0.1, 0.9),
        distribs.Continuous('y', 0.1, 0.9),
        distribs.Discrete('scale', [0.2]),
        distribs.Discrete('c0', [0.9, 0.55, 0.27], probs=[0.333, 0.334,
                                                          0.333]),
        distribs.Discrete('c1', [0.6]),
        distribs.Continuous('c2', 0.9, 1.),
    ])
    sprite_gen = sprite_generators.generate_sprites(factors,
                                                    num_sprites=NUM_TARGETS,
                                                    fix_colors=True)
    # Randomize sprite ordering to eliminate any task information from occlusions
    sprite_gen = sprite_generators.shuffle(sprite_gen)

    task = tasks.FindGoalPosition(terminate_distance=TERMINATE_DISTANCE,
                                  sparse_reward=True)

    config = {
        'task':
        task,
        'action_space':
        common.noisy_action_space(MOTION_STD_DEV, PROPORTIONAL_MOTION_NOISE,
                                  None),
        'renderers':
        common.renderers(),
        'init_sprites':
        sprite_gen,
        'max_episode_length':
        60,
        'metadata': {
            'name': os.path.basename(__file__),
            'mode': mode
        }
    }
    return config
def get_config(mode=None):
    """Generate environment config.

  Args:
    mode: Unused task mode.

  Returns:
    config: Dictionary defining task/environment configuration. Can be fed as
      kwargs to environment.Environment.
  """
    del mode

    shared_factors = distribs.Product([
        distribs.Continuous('x', 0.1, 0.9),
        distribs.Continuous('y', 0.1, 0.9),
        distribs.Discrete('shape', ['square', 'triangle', 'circle']),
        distribs.Discrete('scale', [0.13]),
        distribs.Continuous('c1', 0.3, 1.),
        distribs.Continuous('c2', 0.9, 1.),
    ])
    target_hue = distribs.Continuous('c0', 0., 0.4)
    distractor_hue = distribs.Continuous('c0', 0.5, 0.9)
    target_factors = distribs.Product([
        target_hue,
        shared_factors,
    ])
    distractor_factors = distribs.Product([
        distractor_hue,
        shared_factors,
    ])

    target_sprite_gen = sprite_generators.generate_sprites(
        target_factors, num_sprites=NUM_TARGETS)
    distractor_sprite_gen = sprite_generators.generate_sprites(
        distractor_factors, num_sprites=NUM_DISTRACTORS)
    sprite_gen = sprite_generators.chain_generators(target_sprite_gen,
                                                    distractor_sprite_gen)
    # Randomize sprite ordering to eliminate any task information from occlusions
    sprite_gen = sprite_generators.shuffle(sprite_gen)

    # Create the agent body
    agent_body_factors = distribs.Product([
        distribs.Continuous('x', 0.1, 0.9),
        distribs.Continuous('y', 0.1, 0.9),
        distribs.Discrete('shape', ['circle']),
        distribs.Discrete('scale', [0.07]),
        distribs.Discrete('c0', [1.]),
        distribs.Discrete('c1', [0.]),
        distribs.Discrete('c2', [1.]),
    ])
    agent_body_gen = sprite_generators.generate_sprites(agent_body_factors,
                                                        num_sprites=1)
    sprite_gen = sprite_generators.chain_generators(sprite_gen, agent_body_gen)

    task = tasks.FindGoalPosition(filter_distrib=target_hue,
                                  terminate_distance=TERMINATE_DISTANCE)

    renderers = {
        'image':
        spriteworld_renderers.PILRenderer(image_size=(64, 64),
                                          anti_aliasing=5,
                                          color_to_rgb=color_maps.hsv_to_rgb)
    }

    config = {
        'task': task,
        'action_space': action_spaces.Embodied(step_size=0.05),
        'renderers': renderers,
        'init_sprites': sprite_gen,
        'max_episode_length': 50,
        'metadata': {
            'name': os.path.basename(__file__),
            'mode': mode
        }
    }
    return config
예제 #5
0
def get_config(mode='train'):
    """Generate environment config.

  Args:
    mode: Unused task mode.

  Returns:
    config: Dictionary defining task/environment configuration. Can be fed as
      kwargs to environment.Environment.
  """

    # Factor distributions common to all objects.
    common_factors = distribs.Product([
        distribs.Continuous('x', 0.1, 0.9),
        distribs.Continuous('y', 0.1, 0.9),
        distribs.Continuous('angle', 0, 360, dtype='int32'),
    ])

    # train/test split for goal-finding object scales and clustering object colors
    goal_finding_scale_test = distribs.Continuous('scale', 0.08, 0.12)
    green_blue_colors = distribs.Product([
        distribs.Continuous('c1', 64, 256, dtype='int32'),
        distribs.Continuous('c2', 64, 256, dtype='int32'),
    ])
    if mode == 'train':
        goal_finding_scale = distribs.SetMinus(
            distribs.Continuous('scale', 0.05, 0.15),
            goal_finding_scale_test,
        )
        cluster_colors = distribs.Product([
            distribs.Continuous('c0', 128, 256, dtype='int32'),
            green_blue_colors
        ])
    elif mode == 'test':
        goal_finding_scale = goal_finding_scale_test
        cluster_colors = distribs.Product([
            distribs.Continuous('c0', 0, 128, dtype='int32'), green_blue_colors
        ])
    else:
        raise ValueError(
            'Invalid mode {}. Mode must be "train" or "test".'.format(mode))

    # Create clustering sprite generators
    sprite_gen_list = []
    cluster_shapes = [
        distribs.Discrete('shape', [s])
        for s in ['triangle', 'square', 'pentagon']
    ]
    for shape in cluster_shapes:
        factors = distribs.Product([
            common_factors,
            cluster_colors,
            shape,
            distribs.Continuous('scale', 0.08, 0.12),
        ])
        sprite_gen_list.append(
            sprite_generators.generate_sprites(factors, num_sprites=2))

    # Create goal-finding sprite generators
    goal_finding_colors = [
        distribs.Product([
            distribs.Continuous('c0', 192, 256, dtype='int32'),
            distribs.Continuous('c1', 0, 128, dtype='int32'),
            distribs.Continuous('c2', 64, 128, dtype='int32'),
        ]),
        distribs.Product([
            distribs.Continuous('c0', 0, 128, dtype='int32'),
            distribs.Continuous('c1', 192, 256, dtype='int32'),
            distribs.Continuous('c2', 64, 128, dtype='int32'),
        ])
    ]
    # Goal positions corresponding to the colors in goal_finding_colors
    goal_finding_positions = [(0., 0.5), (1., 0.5)]
    goal_finding_shapes = distribs.Discrete('shape', ['spoke_4', 'star_4'])
    for colors in goal_finding_colors:
        factors = distribs.Product([
            common_factors,
            goal_finding_scale,
            goal_finding_shapes,
            colors,
        ])
        sprite_gen_list.append(
            sprite_generators.generate_sprites(
                factors, num_sprites=lambda: np.random.randint(1, 3)))

    # Create distractor sprite generator
    distractor_factors = distribs.Product([
        common_factors,
        distribs.Discrete('shape', ['circle']),
        distribs.Continuous('c0', 64, 256, dtype='uint8'),
        distribs.Continuous('c1', 64, 256, dtype='uint8'),
        distribs.Continuous('c2', 64, 256, dtype='uint8'),
        distribs.Continuous('scale', 0.08, 0.12),
    ])
    sprite_gen_list.append(
        sprite_generators.generate_sprites(
            distractor_factors, num_sprites=lambda: np.random.randint(0, 3)))

    # Concat clusters into single scene to generate
    sprite_gen = sprite_generators.chain_generators(*sprite_gen_list)
    # Randomize sprite ordering to eliminate any task information from occlusions
    sprite_gen = sprite_generators.shuffle(sprite_gen)

    # Create the combined task of goal-finding and clustering
    task_list = []
    task_list.append(
        tasks.Clustering(cluster_shapes, terminate_bonus=0., reward_range=10.))
    for colors, goal_pos in zip(goal_finding_colors, goal_finding_positions):
        goal_finding_task = tasks.FindGoalPosition(distribs.Product(
            [colors, goal_finding_shapes]),
                                                   goal_position=goal_pos,
                                                   weights_dimensions=(1, 0),
                                                   terminate_distance=0.15,
                                                   raw_reward_multiplier=30)
        task_list.append(goal_finding_task)
    task = tasks.MetaAggregated(task_list,
                                reward_aggregator='sum',
                                termination_criterion='all')

    renderers = {
        'image':
        spriteworld_renderers.PILRenderer(image_size=(64, 64), anti_aliasing=5)
    }

    config = {
        'task': task,
        'action_space': action_spaces.SelectMove(scale=0.5),
        'renderers': renderers,
        'init_sprites': sprite_gen,
        'max_episode_length': 50,
        'metadata': {
            'name': os.path.basename(__file__),
            'mode': mode
        }
    }
    return config
예제 #6
0
 def testOutput(self):
     g = sprite_generators.generate_sprites(_distrib_0, num_sprites=5)
     g_shuffle = sprite_generators.shuffle(g)
     sprite_list = g_shuffle()
     self.assertIsInstance(sprite_list, list)
     self.assertLen(sprite_list, 5)
예제 #7
0
def get_config(mode='train'):
    """Generate environment config.

  Args:
    mode: 'train' or 'test'.

  Returns:
    config: Dictionary defining task/environment configuration. Can be fed as
      kwargs to environment.Environment.
  """

    # Create the subtasks and their corresponding sprite generators
    subtasks = []
    sprite_gen_per_subtask = []
    for subtask in SUBTASKS:
        subtasks.append(
            tasks.FindGoalPosition(
                filter_distrib=subtask['distrib'],
                goal_position=subtask['goal_position'],
                terminate_distance=TERMINATE_DISTANCE,
                raw_reward_multiplier=RAW_REWARD_MULTIPLIER))
        factors = distribs.Product((
            subtask['distrib'],
            distribs.Continuous('x', 0.1, 0.9),
            distribs.Continuous('y', 0.1, 0.9),
            distribs.Discrete('shape', ['square', 'triangle', 'circle']),
            distribs.Discrete('scale', [0.13]),
            distribs.Continuous('c1', 0.3, 1.),
            distribs.Continuous('c2', 0.9, 1.),
        ))
        sprite_gen_per_subtask.append(
            sprite_generators.generate_sprites(factors, num_sprites=1))

    # Consider all combinations of subtasks
    subtask_combos = list(
        itertools.combinations(np.arange(len(SUBTASKS)), NUM_TARGETS))
    if mode == 'train':
        # Randomly sample a combination of subtasks, holding one combination out
        sprite_gen = sprite_generators.sample_generator([
            sprite_generators.chain_generators(
                *[sprite_gen_per_subtask[i] for i in c])
            for c in subtask_combos[1:]
        ])

    elif mode == 'test':
        # Use the held-out subtask combination for testing
        sprite_gen = sprite_generators.chain_generators(
            *[sprite_gen_per_subtask[i] for i in subtask_combos[0]])
    else:
        raise ValueError('Invalide mode {}.'.format(mode))

    # Randomize sprite ordering to eliminate any task information from occlusions
    sprite_gen = sprite_generators.shuffle(sprite_gen)

    task = tasks.MetaAggregated(subtasks,
                                reward_aggregator='sum',
                                termination_criterion='all')

    config = {
        'task': task,
        'action_space': common.action_space(),
        'renderers': common.renderers(),
        'init_sprites': sprite_gen,
        'max_episode_length': MAX_EPISODE_LENGTH,
        'metadata': {
            'name': os.path.basename(__file__),
            'mode': mode
        }
    }

    return config