Example #1
0
def test_create_input_placeholders(num_vector, num_visual):
    vec_size = 8
    name_prefix = "test123"
    bspec = create_behavior_spec(num_visual, num_vector, vec_size)
    vec_in, vis_in = ModelUtils.create_input_placeholders(
        bspec.observation_shapes, name_prefix=name_prefix)

    assert isinstance(vis_in, list)
    assert len(vis_in) == num_visual
    assert isinstance(vec_in, tf.Tensor)
    assert vec_in.get_shape().as_list()[1] == num_vector * 8

    # Check names contain prefix and vis shapes are correct
    for _vis in vis_in:
        assert _vis.get_shape().as_list() == [None, 84, 84, 3]
        assert _vis.name.startswith(name_prefix)
    assert vec_in.name.startswith(name_prefix)
Example #2
0
    def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
        """
        Creates state encoders for current and future observations.
        Used for implementation of Curiosity-driven Exploration by Self-supervised Prediction
        See https://arxiv.org/abs/1705.05363 for more details.
        :return: current and future state encoder tensors.
        """
        encoded_state_list = []
        encoded_next_state_list = []

        # Create input ops for next (t+1) visual observations.
        self.next_vector_in, self.next_visual_in = ModelUtils.create_input_placeholders(
            self.policy.behavior_spec.observation_shapes,
            name_prefix="curiosity_next_")

        if self.next_visual_in:
            visual_encoders = []
            next_visual_encoders = []
            for i, (vis_in, next_vis_in) in enumerate(
                    zip(self.policy.visual_in, self.next_visual_in)):
                # Create the encoder ops for current and next visual input.
                # Note that these encoders are siamese.
                encoded_visual = ModelUtils.create_visual_observation_encoder(
                    vis_in,
                    self.encoding_size,
                    ModelUtils.swish,
                    1,
                    "curiosity_stream_{}_visual_obs_encoder".format(i),
                    False,
                )

                encoded_next_visual = ModelUtils.create_visual_observation_encoder(
                    next_vis_in,
                    self.encoding_size,
                    ModelUtils.swish,
                    1,
                    "curiosity_stream_{}_visual_obs_encoder".format(i),
                    True,
                )
                visual_encoders.append(encoded_visual)
                next_visual_encoders.append(encoded_next_visual)

            hidden_visual = tf.concat(visual_encoders, axis=1)
            hidden_next_visual = tf.concat(next_visual_encoders, axis=1)
            encoded_state_list.append(hidden_visual)
            encoded_next_state_list.append(hidden_next_visual)

        if self.policy.vec_obs_size > 0:
            encoded_vector_obs = ModelUtils.create_vector_observation_encoder(
                self.policy.vector_in,
                self.encoding_size,
                ModelUtils.swish,
                2,
                "curiosity_vector_obs_encoder",
                False,
            )
            encoded_next_vector_obs = ModelUtils.create_vector_observation_encoder(
                self.next_vector_in,
                self.encoding_size,
                ModelUtils.swish,
                2,
                "curiosity_vector_obs_encoder",
                True,
            )
            encoded_state_list.append(encoded_vector_obs)
            encoded_next_state_list.append(encoded_next_vector_obs)
        encoded_state = tf.concat(encoded_state_list, axis=1)
        encoded_next_state = tf.concat(encoded_next_state_list, axis=1)
        return encoded_state, encoded_next_state
Example #3
0
    def create_input_placeholders(self):
        with self.graph.as_default():
            (
                self.global_step,
                self.increment_step_op,
                self.steps_to_increment,
            ) = ModelUtils.create_global_steps()
            self.vector_in, self.visual_in = ModelUtils.create_input_placeholders(
                self.behavior_spec.observation_shapes)
            if self.normalize:
                normalization_tensors = ModelUtils.create_normalizer(
                    self.vector_in)
                self.update_normalization_op = normalization_tensors.update_op
                self.normalization_steps = normalization_tensors.steps
                self.running_mean = normalization_tensors.running_mean
                self.running_variance = normalization_tensors.running_variance
                self.processed_vector_in = ModelUtils.normalize_vector_obs(
                    self.vector_in,
                    self.running_mean,
                    self.running_variance,
                    self.normalization_steps,
                )
            else:
                self.processed_vector_in = self.vector_in
                self.update_normalization_op = None

            self.batch_size_ph = tf.placeholder(shape=None,
                                                dtype=tf.int32,
                                                name="batch_size")
            self.sequence_length_ph = tf.placeholder(shape=None,
                                                     dtype=tf.int32,
                                                     name="sequence_length")
            self.mask_input = tf.placeholder(shape=[None],
                                             dtype=tf.float32,
                                             name="masks")
            # Only needed for PPO, but needed for BC module
            self.epsilon = tf.placeholder(shape=[None, self.act_size[0]],
                                          dtype=tf.float32,
                                          name="epsilon")
            self.mask = tf.cast(self.mask_input, tf.int32)

            tf.Variable(
                int(self.behavior_spec.is_action_continuous()),
                name="is_continuous_control",
                trainable=False,
                dtype=tf.int32,
            )
            int_version = TFPolicy._convert_version_string(__version__)
            major_ver_t = tf.Variable(
                int_version[0],
                name="trainer_major_version",
                trainable=False,
                dtype=tf.int32,
            )
            minor_ver_t = tf.Variable(
                int_version[1],
                name="trainer_minor_version",
                trainable=False,
                dtype=tf.int32,
            )
            patch_ver_t = tf.Variable(
                int_version[2],
                name="trainer_patch_version",
                trainable=False,
                dtype=tf.int32,
            )
            self.version_tensors = (major_ver_t, minor_ver_t, patch_ver_t)
            tf.Variable(
                MODEL_FORMAT_VERSION,
                name="version_number",
                trainable=False,
                dtype=tf.int32,
            )
            tf.Variable(self.m_size,
                        name="memory_size",
                        trainable=False,
                        dtype=tf.int32)
            if self.behavior_spec.is_action_continuous():
                tf.Variable(
                    self.act_size[0],
                    name="action_output_shape",
                    trainable=False,
                    dtype=tf.int32,
                )
            else:
                tf.Variable(
                    sum(self.act_size),
                    name="action_output_shape",
                    trainable=False,
                    dtype=tf.int32,
                )
Example #4
0
    def make_inputs(self) -> None:
        """
        Creates the input layers for the discriminator
        """
        self.done_expert_holder = tf.placeholder(shape=[None],
                                                 dtype=tf.float32)
        self.done_policy_holder = tf.placeholder(shape=[None],
                                                 dtype=tf.float32)
        self.done_expert = tf.expand_dims(self.done_expert_holder, -1)
        self.done_policy = tf.expand_dims(self.done_policy_holder, -1)

        if self.policy.behavior_spec.is_action_continuous():
            action_length = self.policy.act_size[0]
            self.action_in_expert = tf.placeholder(shape=[None, action_length],
                                                   dtype=tf.float32)
            self.expert_action = tf.identity(self.action_in_expert)
        else:
            action_length = len(self.policy.act_size)
            self.action_in_expert = tf.placeholder(shape=[None, action_length],
                                                   dtype=tf.int32)
            self.expert_action = tf.concat(
                [
                    tf.one_hot(self.action_in_expert[:, i], act_size)
                    for i, act_size in enumerate(self.policy.act_size)
                ],
                axis=1,
            )

        encoded_policy_list = []
        encoded_expert_list = []

        (
            self.obs_in_expert,
            self.expert_visual_in,
        ) = ModelUtils.create_input_placeholders(
            self.policy.behavior_spec.observation_shapes, "gail_")

        if self.policy.vec_obs_size > 0:
            if self.policy.normalize:
                encoded_expert_list.append(
                    ModelUtils.normalize_vector_obs(
                        self.obs_in_expert,
                        self.policy.running_mean,
                        self.policy.running_variance,
                        self.policy.normalization_steps,
                    ))
                encoded_policy_list.append(self.policy.processed_vector_in)
            else:
                encoded_expert_list.append(self.obs_in_expert)
                encoded_policy_list.append(self.policy.vector_in)

        if self.expert_visual_in:
            visual_policy_encoders = []
            visual_expert_encoders = []
            for i, (vis_in, exp_vis_in) in enumerate(
                    zip(self.policy.visual_in, self.expert_visual_in)):
                encoded_policy_visual = ModelUtils.create_visual_observation_encoder(
                    vis_in,
                    self.encoding_size,
                    ModelUtils.swish,
                    1,
                    "gail_stream_{}_visual_obs_encoder".format(i),
                    False,
                )

                encoded_expert_visual = ModelUtils.create_visual_observation_encoder(
                    exp_vis_in,
                    self.encoding_size,
                    ModelUtils.swish,
                    1,
                    "gail_stream_{}_visual_obs_encoder".format(i),
                    True,
                )
                visual_policy_encoders.append(encoded_policy_visual)
                visual_expert_encoders.append(encoded_expert_visual)
            hidden_policy_visual = tf.concat(visual_policy_encoders, axis=1)
            hidden_expert_visual = tf.concat(visual_expert_encoders, axis=1)
            encoded_policy_list.append(hidden_policy_visual)
            encoded_expert_list.append(hidden_expert_visual)

        self.encoded_expert = tf.concat(encoded_expert_list, axis=1)
        self.encoded_policy = tf.concat(encoded_policy_list, axis=1)
Example #5
0
    def __init__(
        self,
        policy,
        m_size=None,
        h_size=128,
        normalize=False,
        use_recurrent=False,
        num_layers=2,
        stream_names=None,
        vis_encode_type=EncoderType.SIMPLE,
    ):
        super().__init__(
            policy,
            m_size,
            h_size,
            normalize,
            use_recurrent,
            num_layers,
            stream_names,
            vis_encode_type,
        )
        with tf.variable_scope(TARGET_SCOPE):
            self.vector_in, self.visual_in = ModelUtils.create_input_placeholders(
                self.policy.behavior_spec.observation_shapes
            )
            if self.policy.normalize:
                normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
                self.update_normalization_op = normalization_tensors.update_op
                self.normalization_steps = normalization_tensors.steps
                self.running_mean = normalization_tensors.running_mean
                self.running_variance = normalization_tensors.running_variance
                self.processed_vector_in = ModelUtils.normalize_vector_obs(
                    self.vector_in,
                    self.running_mean,
                    self.running_variance,
                    self.normalization_steps,
                )
            else:
                self.processed_vector_in = self.vector_in
                self.update_normalization_op = None

            if self.policy.use_recurrent:
                self.memory_in = tf.placeholder(
                    shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
                )
                self.value_memory_in = self.memory_in
            hidden_streams = ModelUtils.create_observation_streams(
                self.visual_in,
                self.processed_vector_in,
                1,
                self.h_size,
                0,
                vis_encode_type=vis_encode_type,
                stream_scopes=["critic/value/"],
            )
        if self.policy.use_continuous_act:
            self._create_cc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
        else:
            self._create_dc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
        if self.use_recurrent:
            self.memory_out = tf.concat(
                self.value_memory_out, axis=1
            )  # Needed for Barracuda to work