Ejemplo n.º 1
0
    def __init__(self,
                 *,
                 env_spec,
                 common_network_cls,
                 common_network_args,
                 state_input_dim,
                 state_network_cls=None,
                 state_network_args=dict(),
                 action_network_cls=None,
                 action_network_args=dict()):
        Serializable.quick_init(self, locals())

        logger.log('Reconciler: {}'.format(locals()))

        self.env_spec = env_spec

        if state_network_cls is not None:
            state_network_args[
                'input_shape'] = env_spec.observation_space.shape
            state_network = state_network_cls(**state_network_args)
            self.state_input_layer = state_network.input_layer
            state_processed_layer = state_network.output_layer
        else:
            self.state_input_layer = L.InputLayer(shape=(None,
                                                         state_input_dim),
                                                  input_var=None,
                                                  name='input_state')
            state_processed_layer = self.state_input_layer

        if action_network_cls is not None:
            action_network_args['input_shape'] = (
                env_spec.action_space.flat_dim, )
            action_network = action_network_cls(**action_network_args)
            self.action_input_layer = action_network.input_layer
            action_processed_layer = action_network.output_layer
        else:
            self.action_input_layer = L.InputLayer(
                shape=(None, env_spec.action_space.flat_dim),
                input_var=None,
                name='input_action')
            action_processed_layer = self.action_input_layer

        concat_layer = L.concat(
            [L.flatten(state_processed_layer), action_processed_layer])

        common_network_args['input_layer'] = concat_layer
        common_network = common_network_cls(**common_network_args)

        self.output_layer = common_network.output_layer

        self.output_layers = [self.output_layer]
Ejemplo n.º 2
0
    def __init__(self, name, input_shape, extra_input_shape, output_dim, hidden_sizes,
                 conv_filters, conv_filter_sizes, conv_strides, conv_pads,
                 extra_hidden_sizes=None,
                 hidden_W_init=L.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer,
                 output_W_init=L.XavierUniformInitializer(), output_b_init=tf.zeros_initializer,
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 input_var=None, input_layer=None):
        Serializable.quick_init(self, locals())

        if extra_hidden_sizes is None:
            extra_hidden_sizes = []

        with tf.variable_scope(name):

            input_flat_dim = np.prod(input_shape)
            extra_input_flat_dim = np.prod(extra_input_shape)
            total_input_flat_dim = input_flat_dim + extra_input_flat_dim

            if input_layer is None:
                l_in = L.InputLayer(shape=(None, total_input_flat_dim), input_var=input_var, name="input")
            else:
                l_in = input_layer

            l_conv_in = L.reshape(
                L.SliceLayer(
                    l_in,
                    indices=slice(input_flat_dim),
                    name="conv_slice"
                ),
                ([0],) + input_shape,
                name="conv_reshaped"
            )
            l_extra_in = L.reshape(
                L.SliceLayer(
                    l_in,
                    indices=slice(input_flat_dim, None),
                    name="extra_slice"
                ),
                ([0],) + extra_input_shape,
                name="extra_reshaped"
            )

            l_conv_hid = l_conv_in
            for idx, conv_filter, filter_size, stride, pad in zip(
                    range(len(conv_filters)),
                    conv_filters,
                    conv_filter_sizes,
                    conv_strides,
                    conv_pads,
            ):
                l_conv_hid = L.Conv2DLayer(
                    l_conv_hid,
                    num_filters=conv_filter,
                    filter_size=filter_size,
                    stride=(stride, stride),
                    pad=pad,
                    nonlinearity=hidden_nonlinearity,
                    name="conv_hidden_%d" % idx,
                )

            l_extra_hid = l_extra_in
            for idx, hidden_size in enumerate(extra_hidden_sizes):
                l_extra_hid = L.DenseLayer(
                    l_extra_hid,
                    num_units=hidden_size,
                    nonlinearity=hidden_nonlinearity,
                    name="extra_hidden_%d" % idx,
                    W=hidden_W_init,
                    b=hidden_b_init,
                )

            l_joint_hid = L.concat(
                [L.flatten(l_conv_hid, name="conv_hidden_flat"), l_extra_hid],
                name="joint_hidden"
            )

            for idx, hidden_size in enumerate(hidden_sizes):
                l_joint_hid = L.DenseLayer(
                    l_joint_hid,
                    num_units=hidden_size,
                    nonlinearity=hidden_nonlinearity,
                    name="joint_hidden_%d" % idx,
                    W=hidden_W_init,
                    b=hidden_b_init,
                )
            l_out = L.DenseLayer(
                l_joint_hid,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output",
                W=output_W_init,
                b=output_b_init,
            )
            self._l_in = l_in
            self._l_out = l_out

            LayersPowered.__init__(self, [l_out], input_layers=[l_in])
Ejemplo n.º 3
0
    def __init__(self,
                 name,
                 input_shape,
                 extra_input_shape,
                 output_dim,
                 hidden_sizes,
                 conv_filters,
                 conv_filter_sizes,
                 conv_strides,
                 conv_pads,
                 extra_hidden_sizes=None,
                 hidden_W_init=L.XavierUniformInitializer(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_W_init=L.XavierUniformInitializer(),
                 output_b_init=tf.zeros_initializer(),
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 input_var=None,
                 input_layer=None):
        Serializable.quick_init(self, locals())

        if extra_hidden_sizes is None:
            extra_hidden_sizes = []

        with tf.variable_scope(name):

            input_flat_dim = np.prod(input_shape)
            extra_input_flat_dim = np.prod(extra_input_shape)
            total_input_flat_dim = input_flat_dim + extra_input_flat_dim

            if input_layer is None:
                l_in = L.InputLayer(shape=(None, total_input_flat_dim),
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer

            l_conv_in = L.reshape(L.SliceLayer(l_in,
                                               indices=slice(input_flat_dim),
                                               name="conv_slice"),
                                  ([0], ) + input_shape,
                                  name="conv_reshaped")
            l_extra_in = L.reshape(L.SliceLayer(l_in,
                                                indices=slice(
                                                    input_flat_dim, None),
                                                name="extra_slice"),
                                   ([0], ) + extra_input_shape,
                                   name="extra_reshaped")

            l_conv_hid = l_conv_in
            for idx, conv_filter, filter_size, stride, pad in zip(
                    range(len(conv_filters)),
                    conv_filters,
                    conv_filter_sizes,
                    conv_strides,
                    conv_pads,
            ):
                l_conv_hid = L.Conv2DLayer(
                    l_conv_hid,
                    num_filters=conv_filter,
                    filter_size=filter_size,
                    stride=(stride, stride),
                    pad=pad,
                    nonlinearity=hidden_nonlinearity,
                    name="conv_hidden_%d" % idx,
                )

            l_extra_hid = l_extra_in
            for idx, hidden_size in enumerate(extra_hidden_sizes):
                l_extra_hid = L.DenseLayer(
                    l_extra_hid,
                    num_units=hidden_size,
                    nonlinearity=hidden_nonlinearity,
                    name="extra_hidden_%d" % idx,
                    W=hidden_W_init,
                    b=hidden_b_init,
                )

            l_joint_hid = L.concat(
                [L.flatten(l_conv_hid, name="conv_hidden_flat"), l_extra_hid],
                name="joint_hidden")

            for idx, hidden_size in enumerate(hidden_sizes):
                l_joint_hid = L.DenseLayer(
                    l_joint_hid,
                    num_units=hidden_size,
                    nonlinearity=hidden_nonlinearity,
                    name="joint_hidden_%d" % idx,
                    W=hidden_W_init,
                    b=hidden_b_init,
                )
            l_out = L.DenseLayer(
                l_joint_hid,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output",
                W=output_W_init,
                b=output_b_init,
            )
            self._l_in = l_in
            self._l_out = l_out

            LayersPowered.__init__(self, [l_out], input_layers=[l_in])