Ejemplo n.º 1
0
    def test_image_scale_transformer(self):
        spec = alf.TensorSpec((3, 16, 16), dtype=torch.uint8)
        transformer = ImageScaleTransformer(spec, min=0.)
        new_spec = transformer.transformed_observation_spec
        self.assertEqual(new_spec.dtype, torch.float32)
        self.assertEqual(new_spec.minimum, 0.)
        self.assertEqual(new_spec.maximum, 1.)
        timestep = DataItem(
            observation=torch.randint(256, (3, 16, 16)).to(torch.uint8))
        transformed = transformer.transform_timestep(timestep, ())[0]
        self.assertLess(
            (transformed.observation * 255 - timestep.observation).abs().max(),
            1e-4)
        transformed = transformer.transform_experience(timestep)
        self.assertLess(
            (transformed.observation * 255 - timestep.observation).abs().max(),
            1e-4)

        spec = dict(img=alf.TensorSpec((3, 16, 16), dtype=torch.uint8),
                    other=alf.TensorSpec(()))
        self.assertRaises(AssertionError, ImageScaleTransformer, spec, min=0.)
        self.assertRaises(AssertionError,
                          ImageScaleTransformer,
                          spec,
                          min=0.,
                          fields=['other'])
        transformer = ImageScaleTransformer(spec, min=0., fields=['img'])
Ejemplo n.º 2
0
 def env_info_spec(self):
     return {
         "play0_win": alf.TensorSpec(()),
         "play1_win": alf.TensorSpec(()),
         "draw": alf.TensorSpec(()),
         "invalid_move": alf.TensorSpec(()),
     }
Ejemplo n.º 3
0
    def __init__(self,
                 batch_size,
                 height=19,
                 width=19,
                 winning_thresh=7.5,
                 allow_suicidal_move=False,
                 reward_shaping=False,
                 human_player=None):
        """
        Args:
            batch_size (int): the number of parallel boards
            height (int): height of each board
            width (int): width of each board
            winning_thresh (float): player 0 wins if area0 - area1 > winning_thresh,
                lose if area0 - area1 < winning_thresh, otherwise draw.
            allow_suicidal_move (bool): whether suicidal move is allowed.
            reward_shaping (bool): if True, instead of using +1,-1 as reward,
                use ``alf.math.softsign(area0 - area1 - winning_thresh)`` as reward
                to encourage capture more area.
            human_player (int|None): 0, 1 or None
        """
        self._batch_size = batch_size
        self._width = width
        self._height = height
        self._max_num_moves = 2 * height * width
        self._winning_thresh = float(winning_thresh)
        self._allow_suicical_move = allow_suicidal_move
        self._reward_shaping = reward_shaping
        self._human_player = human_player

        # width*height for pass
        # otherwise it is a move at (y=action // width, x=action % width)
        self._action_spec = alf.BoundedTensorSpec((),
                                                  minimum=0,
                                                  maximum=width * height,
                                                  dtype=torch.int64)
        self._observation_spec = OrderedDict(
            board=alf.TensorSpec((1, height, width), torch.int8),
            prev_action=self._action_spec,
            valid_action_mask=alf.TensorSpec([width * height + 1], torch.bool),
            steps=alf.TensorSpec((), torch.int32),
            to_play=alf.TensorSpec((), torch.int8))

        self._B = torch.arange(self._batch_size)
        self._env_ids = torch.arange(batch_size)
        self._pass_action = width * height
        self._board = GoBoard(batch_size, height, width, self._max_num_moves)
        self._previous_board = self._board.get_board()
        self._num_moves = torch.zeros((batch_size, ), dtype=torch.int32)
        self._game_over = torch.zeros((batch_size, ), dtype=torch.bool)
        self._prev_action = torch.full((batch_size, ),
                                       self._pass_action,
                                       dtype=torch.int64)
        self._surface = None
        if human_player is not None:
            logging.info("Use mouse click to place a stone")
            logging.info("Kayboard control:")
            logging.info("P     : pass")
            logging.info("SPACE : refresh display")
Ejemplo n.º 4
0
    def __init__(self,
                 env,
                 progress_favor=10.0,
                 current_score_update_rate=1e-3,
                 past_score_update_rate=5e-4,
                 warmup_period=100):
        """
        env (AlfEnvironment): environment to be wrapped. It needs to be batched.
        progress_favor (float): how much more likely to choose the environment with the
            fastest progress than the ones with no progress. If ``progress_favor``
            is 1, all tasks are sampled uniformly.
        current_score_update_rate (float): the rate for updating the current score
        past_score_update_rate (float): the rate for updating the past score
        warmup_period (int): gradually increase ``progress_favor`` from 1 to
            ``progress_favor`` during the first ``num_tasks * warmup_period``
            episodes
        """
        self._env = env
        assert env.batched, "Only batched env is supported"
        num_tasks = env.num_tasks
        task_names = env.task_names
        batch_size = env.batch_size
        self._episode_rewards = torch.zeros(batch_size)
        assert (
            len(env.action_spec()) == 2 and 'action' in env.action_spec()
            and 'task_id' in env.action_spec()
        ), ("The action_spec in the wrapped "
            "environment should have exactly two keys: 'task_id' and 'action'")
        self._action_spec = env.action_spec()['action']
        self._num_tasks = num_tasks
        self._task_names = task_names
        self._env_info_spec = copy.copy(env.env_info_spec())
        self._env_info_spec.update(
            self._add_task_names({
                'curriculum_task_count': [alf.TensorSpec(())] * num_tasks,
                'curriculum_task_score': [alf.TensorSpec(())] * num_tasks,
                'curriculum_task_prob': [alf.TensorSpec(())] * num_tasks
            }))
        self._zero_curriculum_info = self._add_task_names({
            'curriculum_task_count':
            [torch.zeros(batch_size, device='cpu')] * num_tasks,
            'curriculum_task_score':
            [torch.zeros(batch_size, device='cpu')] * num_tasks,
            'curriculum_task_prob':
            [torch.zeros(batch_size, device='cpu')] * num_tasks
        })
        self._progress_favor = progress_favor
        self._current_score_update_rate = current_score_update_rate
        self._past_score_update_rate = past_score_update_rate
        self._warmup_period = warmup_period * num_tasks
        self._scale = math.log(progress_favor)
        self._total_count = 0
        self._current_scores = torch.zeros(num_tasks, device='cpu')
        self._past_scores = torch.zeros(num_tasks, device='cpu')
        self._task_probs = torch.ones(num_tasks, device='cpu') / num_tasks
        self._task_counts = torch.zeros(num_tasks, device='cpu')

        self._current_task_ids = self._sample_tasks(batch_size)
Ejemplo n.º 5
0
    def test_curriculum_wrapper(self):
        task_names = ['CartPole-v0', 'CartPole-v1']
        env = create_environment(
            env_name=task_names,
            env_load_fn=suite_gym.load,
            num_parallel_environments=4,
            batched_wrappers=(alf_wrappers.CurriculumWrapper, ))

        self.assertTrue(type(env.action_spec()) == alf.BoundedTensorSpec)

        self.assertEqual(env.num_tasks, 2)
        self.assertEqual(len(env.env_info_spec()['curriculum_task_count']), 2)
        self.assertEqual(len(env.env_info_spec()['curriculum_task_score']), 2)
        self.assertEqual(len(env.env_info_spec()['curriculum_task_prob']), 2)
        for i in task_names:
            self.assertEqual(env.env_info_spec()['curriculum_task_count'][i],
                             alf.TensorSpec(()))
            self.assertEqual(env.env_info_spec()['curriculum_task_score'][i],
                             alf.TensorSpec(()))
            self.assertEqual(env.env_info_spec()['curriculum_task_prob'][i],
                             alf.TensorSpec(()))

        time_step = env.reset()
        self.assertEqual(len(env.env_info_spec()['curriculum_task_count']), 2)
        self.assertEqual(len(env.env_info_spec()['curriculum_task_score']), 2)
        self.assertEqual(len(env.env_info_spec()['curriculum_task_prob']), 2)
        for i in task_names:
            self.assertEqual(
                time_step.env_info['curriculum_task_count'][i].shape, (4, ))
            self.assertEqual(
                time_step.env_info['curriculum_task_score'][i].shape, (4, ))
            self.assertEqual(
                time_step.env_info['curriculum_task_prob'][i].shape, (4, ))

        for j in range(500):
            time_step = env.step(time_step.prev_action)
            self.assertEqual(time_step.env_id, torch.arange(4))
            self.assertEqual(len(env.env_info_spec()['curriculum_task_count']),
                             2)
            self.assertEqual(len(env.env_info_spec()['curriculum_task_score']),
                             2)
            self.assertEqual(len(env.env_info_spec()['curriculum_task_prob']),
                             2)
            for i in task_names:
                self.assertEqual(
                    time_step.env_info['curriculum_task_count'][i].shape,
                    (4, ))
                self.assertEqual(
                    time_step.env_info['curriculum_task_score'][i].shape,
                    (4, ))
                self.assertEqual(
                    time_step.env_info['curriculum_task_prob'][i].shape, (4, ))
            sum_probs = sum(
                time_step.env_info['curriculum_task_prob'].values())
            self.assertTrue(
                torch.all((sum_probs == 0.) | ((sum_probs - 1.).abs() < 1e-3)))
Ejemplo n.º 6
0
 def __init__(self, dim, size, name="FIFOMemory"):
     """
     Args:
         dim (int): dimension of memory content
         size (int): number of memory slots
     """
     self._built = False
     state_spec = (alf.TensorSpec((size, dim), dtype=torch.float32),
                   alf.TensorSpec((), dtype=torch.int64))
     self._range = torch.arange(size).unsqueeze(0)
     super().__init__(dim, size, state_spec=state_spec, name=name)
Ejemplo n.º 7
0
    def __init__(self, network, n, name=None):
        """
        A parallel network has ``n`` copies of network with the same structure but
        different indepently initialized parameters.

        ``NaiveParallelNetwork`` created ``n`` independent networks with the same
        structure as ``network`` and evaluate them separately in loop during
        ``forward()``.

        Args:
            network (Network): the parallel network will have ``n`` copies of
                ``network``.
            n (int): ``n`` copies of ``network``
            name(str): a string that will be used as the name of the created
                NaiveParallelNetwork instance. If ``None``, ``naive_parallel_``
                followed by the ``network.name`` will be used by default.
        """
        super().__init__(network.input_tensor_spec,
                         name if name else 'naive_parallel_%s' % network.name)
        self._networks = nn.ModuleList(
            [network.copy(name=self.name + '_%d' % i) for i in range(n)])
        self._n = n
        self._state_spec = alf.nest.map_structure(
            lambda spec: alf.TensorSpec((n, ) + spec.shape, spec.dtype),
            network.state_spec)
Ejemplo n.º 8
0
    def test_conversions(self):
        dists = {
            't':
            torch.tensor([[1., 2., 4.], [3., 3., 1.]]),
            'd':
            dist_utils.DiagMultivariateNormal(
                torch.tensor([[1., 2.], [2., 2.]]),
                torch.tensor([[2., 3.], [1., 1.]]))
        }
        params = dist_utils.distributions_to_params(dists)
        dists_spec = dist_utils.extract_spec(dists, from_dim=1)
        self.assertEqual(dists_spec['t'],
                         alf.TensorSpec(shape=(3, ), dtype=torch.float32))
        self.assertEqual(type(dists_spec['d']), dist_utils.DistributionSpec)
        self.assertEqual(len(params), 2)
        self.assertEqual(dists['t'], params['t'])
        self.assertEqual(dists['d'].base_dist.mean, params['d']['loc'])
        self.assertEqual(dists['d'].base_dist.stddev, params['d']['scale'])

        dists1 = dist_utils.params_to_distributions(params, dists_spec)
        self.assertEqual(len(dists1), 2)
        self.assertEqual(dists1['t'], dists['t'])
        self.assertEqual(type(dists1['d']), type(dists['d']))

        params_spec = dist_utils.to_distribution_param_spec(dists_spec)
        alf.nest.assert_same_structure(params_spec, params)
        params1_spec = dist_utils.extract_spec(params)
        self.assertEqual(params_spec, params1_spec)
Ejemplo n.º 9
0
def _create_merlin_algorithm(env,
                             encoder_fc_layers=(3, ),
                             latent_dim=4,
                             lstm_size=(4, ),
                             memory_size=20,
                             learning_rate=1e-3,
                             debug_summaries=True):
    config = TrainerConfig(root_dir="dummy", unroll_length=6)
    observation_spec = env.observation_spec()
    action_spec = env.action_spec()
    algorithm = MerlinAlgorithm(
        observation_spec=observation_spec,
        action_spec=action_spec,
        env=env,
        config=config,
        encoders=alf.networks.EncodingNetwork(
            input_tensor_spec=observation_spec,
            fc_layer_params=encoder_fc_layers,
            activation=math_ops.identity,
            name="ObsEncoder"),
        decoders=DecodingAlgorithm(decoder=alf.networks.EncodingNetwork(
            input_tensor_spec=alf.TensorSpec((latent_dim, )),
            fc_layer_params=encoder_fc_layers,
            activation=math_ops.identity,
            name="ObsDecoder"),
                                   loss_weight=100.),
        latent_dim=latent_dim,
        lstm_size=lstm_size,
        memory_size=memory_size,
        optimizer=alf.optimizers.AdamTF(lr=learning_rate),
        debug_summaries=debug_summaries)

    return algorithm
Ejemplo n.º 10
0
 def __init__(self, observation_spec, action_spec, debug_summaries):
     super().__init__(observation_spec,
                      action_spec,
                      train_state_spec=MCTSState(
                          steps=alf.TensorSpec((), dtype=torch.int64)),
                      debug_summaries=debug_summaries)
     self._model = None
Ejemplo n.º 11
0
 def _make_stacked_spec(self, spec):
     assert isinstance(
         spec, alf.TensorSpec), (str(type(spec)) + "is not a TensorSpec")
     if spec.ndim > 0:
         stacked_shape = list(copy.copy(spec.shape))
         stacked_shape[self._stack_axis] = stacked_shape[
             self._stack_axis] * self._stack_size
         stacked_shape = tuple(stacked_shape)
     else:
         stacked_shape = (self._stack_size, )
     if not spec.is_bounded():
         return alf.TensorSpec(stacked_shape, spec.dtype)
     else:
         if spec.minimum.shape != ():
             assert spec.minimum.shape == spec.shape
             minimum = np.repeat(
                 spec.minimum,
                 repeats=self._stack_size,
                 axis=self._stack_axis)
         else:
             minimum = spec.minimum
         if spec.maximum.shape != ():
             assert spec.maximum.shape == spec.shape
             maximum = np.repeat(
                 spec.maximum,
                 repeats=self._stack_size,
                 axis=self._stack_axis)
         else:
             maximum = spec.maximum
         return alf.BoundedTensorSpec(
             stacked_shape,
             minimum=minimum,
             maximum=maximum,
             dtype=spec.dtype)
Ejemplo n.º 12
0
def create_algorithm(env):
    config = TrainerConfig(root_dir="dummy", unroll_length=5)
    obs_spec = alf.TensorSpec((2, ), dtype='float32')
    action_spec = alf.BoundedTensorSpec(
        shape=(), dtype='int32', minimum=0, maximum=2)

    fc_layer_params = (10, 8, 6)

    actor_network = partial(
        ActorDistributionNetwork,
        fc_layer_params=fc_layer_params,
        discrete_projection_net_ctor=alf.networks.CategoricalProjectionNetwork)

    value_network = partial(ValueNetwork, fc_layer_params=(10, 8, 1))

    alg = ActorCriticAlgorithm(
        observation_spec=obs_spec,
        action_spec=action_spec,
        actor_network_ctor=actor_network,
        value_network_ctor=value_network,
        env=env,
        config=config,
        optimizer=alf.optimizers.Adam(lr=1e-2),
        debug_summaries=True,
        name="MyActorCritic")
    return alg
Ejemplo n.º 13
0
    def __init__(self,
                 dim,
                 size,
                 snapshot_only=False,
                 normalize=True,
                 scale=None,
                 usage_decay=None,
                 name='MemoryWithUsage'):
        """

        See Methods 2.3 of `Unsupervised Predictive Memory in a Goal-Directed
        Agent <https://arxiv.org/abs/1803.10760>`_

        Args:
            dim (int): dimension of memory content
            size (int): number of memory slots
            snapshot_only (bool): If True, only keeps the last snapshot of the
              memory instead of keeping all the memory snapshot at every steps.
              If True, gradient cannot be propagated to the writer.
            normalize (bool): If True, use cosine similarity, otherwise use dot
              product.
            scale (None|float): Scale the similarity by this. If scale is None,
              a default value is used based ``normalize``. If ``normalize`` is True,
              ``scale`` is default to 5.0. If ``normalize`` is False, ``scale`` is
              default to ``1/sqrt(dim)``.
            usage_decay (None|float): The usage will be scaled by this factor
              at every ``write`` call. If None, it is default to ``1 - 1 / size``
        """
        self._normalize = normalize
        if scale is None:
            if normalize:
                scale = 5.0
            else:
                scale = 1. / math.sqrt(dim)
        self._scale = scale
        self._built = False
        self._snapshot_only = snapshot_only
        if usage_decay is None:
            usage_decay = 1. - 1. / size
        self._usage_decay = usage_decay
        state_spec = (alf.TensorSpec((size, dim), dtype=torch.float32),
                      alf.TensorSpec((size, ), dtype=torch.float32))
        super(MemoryWithUsage, self).__init__(dim,
                                              size,
                                              state_spec=state_spec,
                                              name=name)
Ejemplo n.º 14
0
    def test_transformer_network(self, centralized_memory=True):
        d_model = 32
        core_size = 2
        memory_size = 128
        num_memory_layers = 8
        input_tensor_spec = [
            alf.TensorSpec((), dtype=torch.int64),
            alf.TensorSpec((3, 7, 7), dtype=torch.float32)
        ]
        input_preprocessors = [
            nn.Sequential(nn.Embedding(100, d_model),
                          alf.layers.Reshape((1, d_model))),
            nn.Sequential(alf.layers.Conv2D(3, d_model, kernel_size=1),
                          alf.layers.Reshape((d_model, 49)),
                          alf.layers.Transpose())
        ]
        transformer = TransformerNetwork(
            input_tensor_spec,
            memory_size=memory_size,
            core_size=core_size,
            num_prememory_layers=2,
            num_memory_layers=num_memory_layers,
            num_attention_heads=8,
            d_ff=d_model,
            centralized_memory=centralized_memory,
            input_preprocessors=input_preprocessors)

        state_spec = transformer.state_spec
        if centralized_memory:
            self.assertEqual(len(state_spec), 1)
            self.assertEqual(state_spec[0][0].shape, (memory_size, d_model))
        else:
            self.assertEqual(len(state_spec), 8)
            for i in range(num_memory_layers):
                self.assertEqual(state_spec[i][0].shape,
                                 (memory_size, d_model))
        batch_size = 64
        x = [
            torch.randint(100, size=(batch_size, )),
            torch.rand((batch_size, 3, 7, 7))
        ]
        state = alf.utils.spec_utils.zeros_from_spec(transformer.state_spec,
                                                     batch_size)
        y, state = transformer(x, state)

        self.assertEqual(y.shape, (batch_size, core_size * d_model))
Ejemplo n.º 15
0
 def __init__(self, batch_size, obs_shape=(2, )):
     super().__init__()
     self._batch_size = batch_size
     self._rewards = torch.tensor([0.5, 1.0, -1.])
     self._observation_spec = alf.TensorSpec(obs_shape, dtype='float32')
     self._action_spec = alf.BoundedTensorSpec(
         shape=(), dtype='int64', minimum=0, maximum=2)
     self.reset()
Ejemplo n.º 16
0
 def env_info_spec(self):
     return {
         "player0_win": alf.TensorSpec(()),
         "player1_win": alf.TensorSpec(()),
         "player0_pass": alf.TensorSpec(()),
         "player1_pass": alf.TensorSpec(()),
         "draw": alf.TensorSpec(()),
         "invalid_move": alf.TensorSpec(()),
         "too_long": alf.TensorSpec(()),
         "bad_move": alf.TensorSpec(()),
     }
Ejemplo n.º 17
0
    def reward_spec(self):
        """Defines the reward provided by the environment.

        The reward of the most environments is a scalar. So we provide a default
        implementation which returns a scalar spec.

        Returns:
            alf.TensorSpec
        """
        return alf.TensorSpec(())
Ejemplo n.º 18
0
 def __init__(self, batch_size):
     self._batch_size = batch_size
     self._observation_spec = alf.TensorSpec((3, 3))
     self._action_spec = alf.BoundedTensorSpec((),
                                               minimum=0,
                                               maximum=8,
                                               dtype=torch.int64)
     self._line_x = torch.tensor(
         [[0, 0, 0], [1, 1, 1], [2, 2, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2],
          [0, 1, 2], [0, 1, 2]]).unsqueeze(0)
     self._line_y = torch.tensor(
         [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 0, 0], [1, 1, 1], [2, 2, 2],
          [0, 1, 2], [2, 1, 0]]).unsqueeze(0)
     self._B = torch.arange(self._batch_size)
     self._empty_board = self._observation_spec.zeros()
     self._boards = self._observation_spec.zeros((self._batch_size, ))
     self._env_ids = torch.arange(batch_size)
     self._player_0 = torch.tensor(-1.)
     self._player_1 = torch.tensor(1.)
Ejemplo n.º 19
0
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec=alf.TensorSpec((3, 64, 64)),
                 name='ResnetDecodingNetwork'):
        """

        Args:
             input_tensor_spec (TensorSpec): input latent spec.
             output_tensor_spec (TensorSpec): desired output shape. Height and
                width needs to be divisible by 8.
        """
        super().__init__(input_tensor_spec, name=name)
        c, h, w = output_tensor_spec.shape
        assert h % 8 == 0
        assert w % 8 == 0

        dec_layers = []
        relu = torch.relu_
        dec_layers.extend([
            alf.layers.FC(input_tensor_spec.shape[0], 500, activation=relu),
            alf.layers.FC(500, h * w, activation=relu),
            alf.layers.Reshape((64, h // 8, w // 8))
        ])

        for stride in reversed([2, 1, 2, 1, 2, 1]):
            dec_layers.append(
                alf.layers.BottleneckBlock(
                    in_channels=64,
                    kernel_size=3,
                    filters=(64, 32, 64),
                    stride=stride,
                    transpose=True))

        dec_layers.append(
            alf.layers.ConvTranspose2D(
                in_channels=64,
                out_channels=3,
                kernel_size=1,
                activation=torch.sigmoid))

        self._model = nn.Sequential(*dec_layers)
Ejemplo n.º 20
0
    def __init__(self,
                 observation_spec,
                 stack_size=4,
                 stack_axis=0,
                 fields=None):
        """Create a FrameStacker object.

        Args:
            observation_spec (nested TensorSpec): describing the observation in timestep
            stack_size (int): stack so many frames
            stack_axis (int): the dimension to stack the observation.
            fields (list[str]): fields to be stacked, A field str is a multi-level
                path denoted by "A.B.C". If None, then non-nested observation is stacked.
        """
        assert stack_size >= 1, (
            "stack_size should be an integer greater than "
            "or equal to 1")
        self._stack_axis = stack_axis
        self._stack_size = stack_size
        self._frames = dict()
        self._fields = fields or [None]
        self._exp_fields = []
        prev_frames_spec = []
        stacked_observation_spec = observation_spec
        for field in self._fields:
            if field is not None:
                exp_field = 'observation.' + field
            else:
                exp_field = 'observation'
            self._exp_fields.append(exp_field)

            spec = alf.nest.get_field(observation_spec, field)
            prev_frames_spec.append([spec] * (self._stack_size - 1))
            stacked_observation_spec = alf.nest.transform_nest(
                stacked_observation_spec, field, self._make_stacked_spec)

        super().__init__(
            transformed_observation_spec=stacked_observation_spec,
            state_spec=FrameStackState(
                steps=alf.TensorSpec((), dtype=torch.int64),
                prev_frames=prev_frames_spec))
Ejemplo n.º 21
0
 def __init__(self, *args):
     super().__init__(*args)
     alf.set_default_device("cpu")  # spawn forking is required to use cuda.
     self.data_spec = DataItem(env_id=alf.TensorSpec(shape=(),
                                                     dtype=torch.int64),
                               x=alf.TensorSpec(shape=(self.dim, ),
                                                dtype=torch.float32),
                               t=alf.TensorSpec(shape=(),
                                                dtype=torch.int32),
                               o=dict({
                                   "a":
                                   alf.TensorSpec(shape=(),
                                                  dtype=torch.float32),
                                   "g":
                                   alf.TensorSpec(shape=(),
                                                  dtype=torch.float32)
                               }),
                               reward=alf.TensorSpec(shape=(),
                                                     dtype=torch.float32))
Ejemplo n.º 22
0
 def observation_spec(self):
     return alf.TensorSpec([7])
Ejemplo n.º 23
0
 def observation_spec(self):
     return alf.TensorSpec([self._max_num_collisions, 3])
Ejemplo n.º 24
0
 def observation_spec(self):
     return alf.TensorSpec([len(self._future_indices), 3])
Ejemplo n.º 25
0
 def observation_spec(self):
     return alf.TensorSpec([self._max_num_detections, 4])
Ejemplo n.º 26
0
    def test_frame_stacker(self, stack_axis=0):
        data_spec = DataItem(step_type=alf.TensorSpec((), dtype=torch.int32),
                             observation=dict(scalar=alf.TensorSpec(()),
                                              vector=alf.TensorSpec((7, )),
                                              matrix=alf.TensorSpec((5, 6)),
                                              tensor=alf.TensorSpec(
                                                  (2, 3, 4))))
        replay_buffer = ReplayBuffer(data_spec=data_spec,
                                     num_environments=2,
                                     max_length=1024,
                                     num_earliest_frames_ignored=2)
        frame_stacker = FrameStacker(
            data_spec.observation,
            stack_size=3,
            stack_axis=stack_axis,
            fields=['scalar', 'vector', 'matrix', 'tensor'])

        new_spec = frame_stacker.transformed_observation_spec
        self.assertEqual(new_spec['scalar'].shape, (3, ))
        self.assertEqual(new_spec['vector'].shape, (21, ))
        if stack_axis == -1:
            self.assertEqual(new_spec['matrix'].shape, (5, 18))
            self.assertEqual(new_spec['tensor'].shape, (2, 3, 12))
        elif stack_axis == 0:
            self.assertEqual(new_spec['matrix'].shape, (15, 6))
            self.assertEqual(new_spec['tensor'].shape, (6, 3, 4))

        def _step_type(t, period):
            if t % period == 0:
                return StepType.FIRST
            if t % period == period - 1:
                return StepType.LAST
            return StepType.MID

        observation = alf.nest.map_structure(
            lambda spec: spec.randn((1000, 2)), data_spec.observation)
        state = common.zero_tensor_from_nested_spec(frame_stacker.state_spec,
                                                    2)

        def _get_stacked_data(t, b):
            if stack_axis == -1:
                return dict(scalar=observation['scalar'][t, b],
                            vector=observation['vector'][t, b].reshape(-1),
                            matrix=observation['matrix'][t, b].transpose(
                                0, 1).reshape(5, 18),
                            tensor=observation['tensor'][t, b].permute(
                                1, 2, 0, 3).reshape(2, 3, 12))
            elif stack_axis == 0:
                return dict(scalar=observation['scalar'][t, b],
                            vector=observation['vector'][t, b].reshape(-1),
                            matrix=observation['matrix'][t, b].reshape(15, 6),
                            tensor=observation['tensor'][t,
                                                         b].reshape(6, 3, 4))

        def _check_equal(stacked, expected, b):
            self.assertEqual(stacked['scalar'][b], expected['scalar'])
            self.assertEqual(stacked['vector'][b], expected['vector'])
            self.assertEqual(stacked['matrix'][b], expected['matrix'])
            self.assertEqual(stacked['tensor'][b], expected['tensor'])

        for t in range(1000):
            batch = DataItem(
                step_type=torch.tensor([_step_type(t, 17),
                                        _step_type(t, 22)]),
                observation=alf.nest.map_structure(lambda x: x[t],
                                                   observation))
            replay_buffer.add_batch(batch)
            timestep, state = frame_stacker.transform_timestep(batch, state)
            if t == 0:
                for b in (0, 1):
                    expected = _get_stacked_data([0, 0, 0], b)
                    _check_equal(timestep.observation, expected, b)
            if t == 1:
                for b in (0, 1):
                    expected = _get_stacked_data([0, 0, 1], b)
                    _check_equal(timestep.observation, expected, b)
            if t == 2:
                for b in (0, 1):
                    expected = _get_stacked_data([0, 1, 2], b)
                    _check_equal(timestep.observation, expected, b)
            if t == 16:
                for b in (0, 1):
                    expected = _get_stacked_data([14, 15, 16], b)
                    _check_equal(timestep.observation, expected, b)
            if t == 17:
                for b, t in ((0, [17, 17, 17]), (1, [15, 16, 17])):
                    expected = _get_stacked_data(t, b)
                    _check_equal(timestep.observation, expected, b)
            if t == 18:
                for b, t in ((0, [17, 17, 18]), (1, [16, 17, 18])):
                    expected = _get_stacked_data(t, b)
                    _check_equal(timestep.observation, expected, b)
            if t == 22:
                for b, t in ((0, [20, 21, 22]), (1, [22, 22, 22])):
                    expected = _get_stacked_data(t, b)
                    _check_equal(timestep.observation, expected, b)

        batch_info = BatchInfo(env_ids=torch.tensor([0, 1, 0, 1],
                                                    dtype=torch.int64),
                               positions=torch.tensor([0, 1, 18, 22],
                                                      dtype=torch.int64))

        # [4, 2, ...]
        experience = replay_buffer.get_field(
            '', batch_info.env_ids.unsqueeze(-1),
            batch_info.positions.unsqueeze(-1) + torch.arange(2))
        experience = experience._replace(batch_info=batch_info,
                                         replay_buffer=replay_buffer)
        experience = frame_stacker.transform_experience(experience)
        expected = _get_stacked_data([0, 0, 0], 0)
        _check_equal(experience.observation, expected, (0, 0))
        expected = _get_stacked_data([0, 0, 1], 0)
        _check_equal(experience.observation, expected, (0, 1))

        expected = _get_stacked_data([0, 0, 1], 1)
        _check_equal(experience.observation, expected, (1, 0))
        expected = _get_stacked_data([0, 1, 2], 1)
        _check_equal(experience.observation, expected, (1, 1))

        expected = _get_stacked_data([17, 17, 18], 0)
        _check_equal(experience.observation, expected, (2, 0))
        expected = _get_stacked_data([17, 18, 19], 0)
        _check_equal(experience.observation, expected, (2, 1))

        expected = _get_stacked_data([22, 22, 22], 1)
        _check_equal(experience.observation, expected, (3, 0))
        expected = _get_stacked_data([22, 22, 23], 1)
        _check_equal(experience.observation, expected, (3, 1))
Ejemplo n.º 27
0
    def test_mcts_algorithm(self):
        observation_spec = alf.TensorSpec((3, 3))
        action_spec = alf.BoundedTensorSpec((),
                                            dtype=torch.int64,
                                            minimum=0,
                                            maximum=8)
        model = TicTacToeModel()
        time_step = TimeStep(step_type=torch.tensor([StepType.MID]))

        # board situations and expected actions
        # yapf: disable
        cases = [
            ([[1, -1,  1],
              [1, -1, -1],
              [0,  0,  1]], 6),
            ([[0,  0,  0],
              [0, -1, -1],
              [0,  1,  0]], 3),
            ([[ 1, -1, -1],
              [-1, -1,  0],
              [ 0,  1,  1]], 6),
            ([[-1,  0,  1],
              [ 0, -1, -1],
              [ 0,  0,  1]], 3),
            ([[0, 0,  0],
              [0, 0,  0],
              [0, 0, -1]], 4),
            ([[0,  0, 0],
              [0, -1, 0],
              [0,  0, 0]], (0, 2, 6, 8)),
            ([[0,  0,  0],
              [0,  1, -1],
              [1, -1, -1]], 2),
        ]
        # yapf: enable

        def _create_mcts(observation_spec, action_spec, num_simulations):
            return MCTSAlgorithm(
                observation_spec,
                action_spec,
                discount=1.0,
                root_dirichlet_alpha=100.,
                root_exploration_fraction=0.25,
                num_simulations=num_simulations,
                pb_c_init=1.25,
                pb_c_base=19652,
                visit_softmax_temperature_fn=VisitSoftmaxTemperatureByMoves(
                    [(0, 1.0), (10, 0.0001)]),
                known_value_bounds=(-1, 1),
                is_two_player_game=True)

        # test case serially
        for observation, action in cases:
            observation = torch.tensor([observation], dtype=torch.float32)
            state = MCTSState(steps=(observation != 0).sum(dim=(1, 2)))
            # We use varing num_simulations instead of a fixed large number such
            # as 2000 to make the test faster.
            num_simulations = int((observation == 0).sum().cpu()) * 200
            mcts = _create_mcts(
                observation_spec, action_spec, num_simulations=num_simulations)
            mcts.set_model(model)
            alg_step = mcts.predict_step(
                time_step._replace(observation=observation), state)
            print(observation, alg_step.output, alg_step.info)
            if type(action) == tuple:
                self.assertTrue(alg_step.output[0] in action)
            else:
                self.assertEqual(alg_step.output[0], action)

        # test batch predict
        observation = torch.tensor([case[0] for case in cases],
                                   dtype=torch.float32)
        state = MCTSState(steps=(observation != 0).sum(dim=(1, 2)))
        mcts = _create_mcts(
            observation_spec, action_spec, num_simulations=2000)
        mcts.set_model(model)
        alg_step = mcts.predict_step(
            time_step._replace(
                step_type=torch.tensor([StepType.MID] * len(cases)),
                observation=observation), state)
        for i, (observation, action) in enumerate(cases):
            if type(action) == tuple:
                self.assertTrue(alg_step.output[i] in action)
            else:
                self.assertEqual(alg_step.output[i], action)
Ejemplo n.º 28
0
 def reward_spec(self):
     return alf.TensorSpec(())
Ejemplo n.º 29
0
    def __init__(
        self,
        parent_actor,
        sensor_type='sensor.camera.rgb',
        xyz=(1.6, 0., 1.7),
        pyr=(0., 0., 0.),
        attachment_type='rigid',
        fov=90.0,
        fstop=1.4,
        gamma=2.2,
        image_size_x=640,
        image_size_y=480,
        iso=1200.0,
    ):
        """
        Args:
            parent_actor (carla.Actor): the parent actor of this sensor
            sensor_type (str): 'sensor.camera.rgb', 'sensor.camera.depth',
                'sensor.camera.semantic_segmentation'
            attachment_type (str): There are two types of attachement. 'rigid':
                the object follow its parent position strictly. 'spring_arm':
                the object expands or retracts depending on camera situation.
            xyz (tuple[float]): the attachment positition (x, y, z) relative to
                the parent_actor.
            pyr (tuple[float]): the attachment rotation (pitch, yaw, roll) in
                degrees.
            fov (str): horizontal field of view in degrees.
            image_size_x (int): image width in pixels.
            image_size_y (int): image height in pixels.
            gamma (float): target gamma value of the camera.
            iso (float): the camera sensor sensitivity.
        """
        super().__init__(parent_actor)
        attachment_type_map = {
            'rigid': carla.AttachmentType.Rigid,
            'spring_arm': carla.AttachmentType.SpringArm,
        }
        assert attachment_type in attachment_type_map, (
            "Unknown attachment_type %s" % attachment_type)
        self._attachment_type = attachment_type_map[attachment_type]
        self._camera_transform = carla.Transform(carla.Location(*xyz),
                                                 carla.Rotation(*pyr))
        self._sensor_type = sensor_type

        sensor_map = {
            'sensor.camera.rgb': (carla.ColorConverter.Raw, 3),
            'sensor.camera.depth': (carla.ColorConverter.LogarithmicDepth, 1),
            'sensor.camera.semantic_segmentation':
            (carla.ColorConverter.Raw, 1),
        }
        assert sensor_type in sensor_map, "Unknown sensor type %s" % sensor_type
        conversion, num_channels = sensor_map[sensor_type]

        self._conversion = conversion
        self._observation_spec = alf.TensorSpec(
            [num_channels, image_size_y, image_size_x], dtype='uint8')

        world = self._parent.get_world()
        bp = world.get_blueprint_library().find(sensor_type)

        attributes = dict(fov=fov,
                          fstop=fstop,
                          gamma=gamma,
                          image_size_x=image_size_x,
                          image_size_y=image_size_y,
                          iso=iso)
        for name, val in attributes.items():
            if bp.has_attribute(name):
                bp.set_attribute(name, str(val))

        self._sensor = self._parent.get_world().spawn_actor(
            bp,
            self._camera_transform,
            attach_to=self._parent,
            attachment_type=self._attachment_type)
        # We need to pass the lambda a weak reference to self to avoid
        # circular reference.
        weak_self = weakref.ref(self)
        self._sensor.listen(
            lambda image: CameraSensor._parse_image(weak_self, image))
        self._frame = 0
        self._image = np.zeros([num_channels, image_size_y, image_size_x],
                               dtype=np.uint8)
Ejemplo n.º 30
0
    def forward(self, experience, value, target_value):
        """Cacluate the loss.

        The first dimension of all the tensors is time dimension and the second
        dimesion is the batch dimension.

        Args:
            experience (Experience): experience collected from ``unroll()`` or
                a replay buffer. All tensors are time-major.
            value (torch.Tensor): the time-major tensor for the value at each time
                step. The loss is between this and the calculated return.
            target_value (torch.Tensor): the time-major tensor for the value at
                each time step. This is used to calculate return. ``target_value``
                can be same as ``value``.
        Returns:
            LossInfo: with the ``extra`` field same as ``loss``.
        """
        if self._lambda == 1.0:
            returns = value_ops.discounted_return(
                rewards=experience.reward,
                values=target_value,
                step_types=experience.step_type,
                discounts=experience.discount * self._gamma)
        elif self._lambda == 0.0:
            returns = value_ops.one_step_discounted_return(
                rewards=experience.reward,
                values=target_value,
                step_types=experience.step_type,
                discounts=experience.discount * self._gamma)
        else:
            advantages = value_ops.generalized_advantage_estimation(
                rewards=experience.reward,
                values=target_value,
                step_types=experience.step_type,
                discounts=experience.discount * self._gamma,
                td_lambda=self._lambda)
            returns = advantages + target_value[:-1]

        value = value[:-1]
        if self._normalize_target:
            if self._target_normalizer is None:
                self._target_normalizer = AdaptiveNormalizer(
                    alf.TensorSpec(value.shape[2:]),
                    auto_update=False,
                    debug_summaries=self._debug_summaries,
                    name=self._name + ".target_normalizer")

            self._target_normalizer.update(returns)
            returns = self._target_normalizer.normalize(returns)
            value = self._target_normalizer.normalize(value)

        if self._debug_summaries and alf.summary.should_record_summaries():
            mask = experience.step_type[:-1] != StepType.LAST
            with alf.summary.scope(self._name):

                def _summarize(v, r, td, suffix):
                    alf.summary.scalar(
                        "explained_variance_of_return_by_value" + suffix,
                        tensor_utils.explained_variance(v, r, mask))
                    safe_mean_hist_summary('values' + suffix, v, mask)
                    safe_mean_hist_summary('returns' + suffix, r, mask)
                    safe_mean_hist_summary("td_error" + suffix, td, mask)

                if value.ndim == 2:
                    _summarize(value, returns, returns - value, '')
                else:
                    td = returns - value
                    for i in range(value.shape[2]):
                        suffix = '/' + str(i)
                        _summarize(value[..., i], returns[..., i], td[..., i],
                                   suffix)

        loss = self._td_error_loss_fn(returns.detach(), value)

        if loss.ndim == 3:
            # Multidimensional reward. Average over the critic loss for all dimensions
            loss = loss.mean(dim=2)

        # The shape of the loss expected by Algorith.update_with_gradient is
        # [T, B], so we need to augment it with additional zeros.
        loss = tensor_utils.tensor_extend_zero(loss)
        return LossInfo(loss=loss, extra=loss)