Esempio n. 1
0
    def __init__(self, name, output_dim, hidden_sizes, hidden_nonlinearity,
                 output_nonlinearity, hidden_W_init=L.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer,
                 output_W_init=L.XavierUniformInitializer(), output_b_init=tf.zeros_initializer, batch_size=None,
                 input_var=None, input_layer=None, input_shape=None, batch_normalization=False, weight_normalization=False,
                 ):
        Serializable.quick_init(self, locals())
        self.name = name

        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(
                    shape=(batch_size,) + input_shape, input_var=input_var, name="input")
            else:
                l_in = input_layer
            self._layers = [l_in]
            l_hid = l_in
            if batch_normalization:
                ls = L.batch_norm(l_hid)
                l_hid = ls[-1]
                self._layers += ls
            for idx, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(
                    l_hid,
                    num_units=hidden_size,
                    nonlinearity=hidden_nonlinearity,
                    name="hidden_%d" % idx,
                    W=hidden_W_init,
                    b=hidden_b_init,
                    weight_normalization=weight_normalization
                )
                if batch_normalization:
                    ls = L.batch_norm(l_hid)
                    l_hid = ls[-1]
                    self._layers += ls
                self._layers.append(l_hid)
            l_out = L.DenseLayer(
                l_hid,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output",
                W=output_W_init,
                b=output_b_init,
                weight_normalization=weight_normalization
            )
            if batch_normalization:
                ls = L.batch_norm(l_out)
                l_out = ls[-1]
                self._layers += ls
            self._layers.append(l_out)
            self._l_in = l_in
            self._l_out = l_out
            self._l_tar = L.InputLayer(
                shape=(batch_size,) + (output_dim,), input_var=input_var, name="target")

            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)
Esempio n. 2
0
    def __init__(self,
                 name,
                 trainer,
                 obs_shape,
                 act_dim,
                 hidden_sizes,
                 hidden_nonlinearity,
                 active_epoch=0,
                 batch_size=None):
        self.x = tf.placeholder(tf.float32,
                                shape=(batch_size, obs_shape),
                                name="x")
        self.x_prime = tf.placeholder(tf.float32,
                                      shape=(batch_size, obs_shape),
                                      name="x_prime")
        self.a = tf.placeholder(tf.float32,
                                shape=(batch_size, act_dim),
                                name="a")
        self.trainer = trainer
        self.batch_size = batch_size
        self.active_epoch = active_epoch

        l_in = tf.concat(1, [self.x, self.x_prime])
        with tf.variable_scope(name):
            self.model = model = feedforward(l_in,
                                             hidden_sizes + [act_dim],
                                             hidden_nonlinearity,
                                             L.XavierUniformInitializer(),
                                             True,
                                             tf.zeros_initializer,
                                             True,
                                             linear_output=True,
                                             drop_prob=0.0)
            self.cost = tf.reduce_mean(tf.nn.l2_loss(model - self.a))
            self.opt = self.trainer.minimize(self.cost)
Esempio n. 3
0
def feedforward(l_hid,
                hidden_sizes,
                hidden_nonlinearity,
                weight_normalization=False,
                hidden_W_init=L.XavierUniformInitializer(),
                hidden_b_init=tf.zeros_initializer,
                linear_output=False,
                start_idx=0):
    for idx, hidden_size in enumerate(hidden_sizes):
        if linear_output and (idx == (len(hidden_sizes) - 1)):
            nonlin = None
        else:
            nonlin = hidden_nonlinearity
        l_hid = L.DenseLayer(l_hid,
                             num_units=hidden_size,
                             nonlinearity=hidden_nonlinearity,
                             name="hidden_%d" % (idx + start_idx),
                             W=hidden_W_init,
                             b=hidden_b_init,
                             weight_normalization=weight_normalization)
    return l_hid
Esempio n. 4
0
    def __init__(self,
                 name,
                 input_shape,
                 output_dim,
                 conv_filters,
                 conv_filter_sizes,
                 conv_strides,
                 conv_pads,
                 hidden_sizes,
                 hidden_nonlinearity,
                 output_nonlinearity,
                 hidden_W_init=L.XavierUniformInitializer(),
                 hidden_b_init=tf.zeros_initializer,
                 output_W_init=L.XavierUniformInitializer(),
                 output_b_init=tf.zeros_initializer,
                 input_var=None,
                 input_layer=None,
                 batch_normalization=False,
                 weight_normalization=False):
        Serializable.quick_init(self, locals())
        """
        A network composed of several convolution layers followed by some fc layers.
        input_shape: (width,height,channel)
            HOWEVER, network inputs are assumed flattened. This network will first unflatten the inputs and then apply the standard convolutions and so on.
        conv_filters: a list of numbers of convolution kernel
        conv_filter_sizes: a list of sizes (int) of the convolution kernels
        conv_strides: a list of strides (int) of the conv kernels
        conv_pads: a list of pad formats (either 'SAME' or 'VALID')
        hidden_nonlinearity: a nonlinearity from tf.nn, shared by all conv and fc layers
        hidden_sizes: a list of numbers of hidden units for all fc layers
        """
        with tf.variable_scope(name):
            if input_layer is not None:
                l_in = input_layer
                l_hid = l_in
            elif len(input_shape) == 3:
                l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                    input_var=input_var,
                                    name="input")
                l_hid = L.reshape(l_in, ([0], ) + input_shape,
                                  name="reshape_input")
            elif len(input_shape) == 2:
                l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                    input_var=input_var,
                                    name="input")
                input_shape = (1, ) + input_shape
                l_hid = L.reshape(l_in, ([0], ) + input_shape,
                                  name="reshape_input")
            else:
                l_in = L.InputLayer(shape=(None, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
                l_hid = l_in

            if batch_normalization:
                l_hid = L.batch_norm(l_hid)
            for idx, conv_filter, filter_size, stride, pad in zip(
                    range(len(conv_filters)),
                    conv_filters,
                    conv_filter_sizes,
                    conv_strides,
                    conv_pads,
            ):
                l_hid = L.Conv2DLayer(
                    l_hid,
                    num_filters=conv_filter,
                    filter_size=filter_size,
                    stride=(stride, stride),
                    pad=pad,
                    nonlinearity=hidden_nonlinearity,
                    name="conv_hidden_%d" % idx,
                    weight_normalization=weight_normalization,
                )
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)

            if output_nonlinearity == L.spatial_expected_softmax:
                assert len(hidden_sizes) == 0
                assert output_dim == conv_filters[-1] * 2
                l_hid.nonlinearity = tf.identity
                l_out = L.SpatialExpectedSoftmaxLayer(l_hid)
            else:
                l_hid = L.flatten(l_hid, name="conv_flatten")
                for idx, hidden_size in enumerate(hidden_sizes):
                    l_hid = L.DenseLayer(
                        l_hid,
                        num_units=hidden_size,
                        nonlinearity=hidden_nonlinearity,
                        name="hidden_%d" % idx,
                        W=hidden_W_init,
                        b=hidden_b_init,
                        weight_normalization=weight_normalization,
                    )
                    if batch_normalization:
                        l_hid = L.batch_norm(l_hid)
                l_out = L.DenseLayer(
                    l_hid,
                    num_units=output_dim,
                    nonlinearity=output_nonlinearity,
                    name="output",
                    W=output_W_init,
                    b=output_b_init,
                    weight_normalization=weight_normalization,
                )
                if batch_normalization:
                    l_out = L.batch_norm(l_out)
            self._l_in = l_in
            self._l_out = l_out
            # self._input_var = l_in.input_var

        LayersPowered.__init__(self, l_out)
Esempio n. 5
0
    def __init__(self,
                 name,
                 trainer,
                 cell,
                 max_steps,
                 d_obs_shape,
                 d_act_shape,
                 t_obs_shape,
                 t_act_shape,
                 d_hidden_sizes,
                 t_hidden_sizes,
                 c_hidden_sizes,
                 hidden_nonlinearity,
                 transform_actions=False,
                 t_w_trainable=True,
                 t_b_trainable=True,
                 d_trainable=True,
                 cost_weight=0.5,
                 hidden_W_init=L.XavierUniformInitializer(),
                 hidden_b_init=tf.zeros_initializer,
                 output_W_init=L.XavierUniformInitializer(),
                 output_b_init=tf.zeros_initializer,
                 batch_size=None,
                 input_var=None,
                 input_layer=None,
                 batch_normalization=False,
                 weight_normalization=False,
                 disable_policy=0,
                 disable_flip_gradient=0,
                 flip_reward=0,
                 d_drop_prob=0.0,
                 c_drop_prob=0.0,
                 t_drop_prob=0.0,
                 wgan=False,
                 share_weights=False,
                 clip_weights={},
                 conv_params={}):
        assert cost_weight >= 0.0 and cost_weight <= 1.0

        self.cell = None
        self.batch_size = batch_size = None
        self.wgan = wgan
        if wgan:
            print("USING WASSERSTEIN GAN")

        t_obs_shape_flat = np.prod(t_obs_shape)
        d_obs_shape_flat = np.prod(d_obs_shape)
        assert (not share_weights) or (t_obs_shape_flat == d_obs_shape_flat)

        self.x_source = x_source = tf.placeholder(tf.float32,
                                                  shape=(batch_size, max_steps,
                                                         d_obs_shape_flat),
                                                  name="x_source")
        self.x_target = x_target = tf.placeholder(tf.float32,
                                                  shape=(batch_size, max_steps,
                                                         t_obs_shape_flat),
                                                  name="x_target")

        self.x_source_a = x_source_a = tf.placeholder(tf.float32,
                                                      shape=(batch_size,
                                                             max_steps,
                                                             d_act_shape),
                                                      name="x_source_a")
        self.x_target_a = x_target_a = tf.placeholder(tf.float32,
                                                      shape=(batch_size,
                                                             max_steps,
                                                             t_act_shape),
                                                      name="x_target_a")
        if transform_actions:
            assert len(d_obs_shape) <= 1
            assert len(t_obs_shape) <= 1
            x_source = tf.concat(2, [self.x_source, self.x_source_a])
            x_target = tf.concat(2, [self.x_target, self.x_target_a])
            d_obs_shape = tuple(d_obs_shape + d_act_shape)
            t_obs_shape = tuple(t_obs_shape + t_act_shape)
        if disable_flip_gradient:
            flip_g = lambda x: x
        else:
            flip_g = lambda x: flip_gradient(x)

        self.transf_outputs = {}

        self.transf_features = {}
        self.rnn_features = {}

        self.conf_logits = {}
        self.disc_logits = {}

        self.disc_outputs = {}
        self.conf_outputs = {}

        self.lr = lr = tf.placeholder(tf.float32, shape=())

        x_source_list = [
            tf.reshape(i, (-1, ) + d_obs_shape)
            for i in tf.split(1, max_steps, x_source)
        ]
        x_target_list = [
            tf.reshape(i, (-1, ) + t_obs_shape)
            for i in tf.split(1, max_steps, x_target)
        ]

        if transform_actions:
            x_source_a_list = [None] * len(x_source_list)
            x_target_a_list = [None] * len(x_target_list)
        else:
            x_source_a_list = [
                tf.reshape(i, (-1, d_act_shape))
                for i in tf.split(1, max_steps, x_source_a)
            ]
            x_target_a_list = [
                tf.reshape(i, (-1, t_act_shape))
                for i in tf.split(1, max_steps, x_target_a)
            ]

        # define transformers
        with tf.variable_scope("reward"):
            for name, list_in, list_a, obs_shape in [
                ("source", x_source_list, x_source_a_list, d_obs_shape),
                ("target", x_target_list, x_target_a_list, t_obs_shape)
            ]:
                scope_name = "transf"
                if not share_weights:
                    scope_name += "/{}".format(name)
                with tf.variable_scope(scope_name) as scope:
                    if share_weights and name == "target":
                        scope.reuse_variables()
                    l_ins, feats = [], []
                    for (i, l_in, x_a) in zip(range(max_steps), list_in,
                                              list_a):
                        # reshape input variable
                        if batch_size is None:
                            bs = -1
                        else:
                            bs = batch_size
                        l_in = tf.reshape(l_in, (bs, ) + obs_shape)
                        # reuse scope
                        if i > 0:
                            scope.reuse_variables()

# perform convolutions, if appropriate.
                        if len(obs_shape) > 1:
                            l_in = convolution(l_in,
                                               conv_params['hws'],
                                               conv_params['channels'],
                                               conv_params['strides'],
                                               conv_params['pads'],
                                               hidden_nonlinearity,
                                               linear_output=False,
                                               drop_prob=0.0)
                            hidden_dim = np.prod(l_in.get_shape()[1:]).value
                            print("CONV OUTPUTS ARE {}-dimensional".format(
                                hidden_dim))
                            l_in = tf.reshape(l_in, (-1, hidden_dim))
                        l_in = feedforward(l_in,
                                           t_hidden_sizes,
                                           hidden_nonlinearity,
                                           hidden_W_init,
                                           t_w_trainable,
                                           hidden_b_init,
                                           t_b_trainable,
                                           drop_prob=t_drop_prob,
                                           linear_output=False)
                        feats.append(l_in)
                        if not transform_actions:
                            l_in = tf.concat(1, [l_in, x_a])
                        l_ins.append(l_in)
                    self.transf_outputs[name] = l_ins
                    self.transf_features[name] = tf.pack(feats)

            with tf.variable_scope("conf") as scope:
                for name in ["source", "target"]:
                    if name == "target": scope.reuse_variables()
                    l_ins = feedforward_on_list(
                        map(flip_g,
                            self.transf_outputs[name]), scope, c_hidden_sizes,
                        hidden_nonlinearity, hidden_W_init, hidden_b_init)
                    self.conf_outputs[name] = l_ins
                    self.conf_logits[name] = tf.reshape(
                        tf.pack(l_ins), (-1, 1))

            # define graph for discriminator
            with tf.variable_scope("disc") as scope:
                if cell is None:
                    for name in ["source", "target"]:
                        if name == "target": scope.reuse_variables()
                        l_ins = feedforward_on_list(self.transf_outputs[name],
                                                    scope, d_hidden_sizes,
                                                    hidden_nonlinearity,
                                                    hidden_W_init,
                                                    hidden_b_init)
                        self.disc_outputs[name] = tf.reshape(
                            tf.pack(l_ins), (-1, 1))
                        self.disc_logits[name] = tf.reshape(
                            tf.pack(l_ins), (-1, 1))
                else:
                    #cell = tf.nn.rnn_cell.BasicLSTMCell(lstm_units, state_is_tuple= True)
                    for i, name in enumerate(["source", "target"]):
                        if i > 0:
                            scope.reuse_variables()
                        rnn_outputs, states = tf.nn.rnn(
                            cell,
                            self.transf_outputs[name],
                            #initial_state= initial_state,
                            dtype=tf.float32)
                        rnn_outputs = tf.pack(rnn_outputs)
                        rnn_outputs = tf.transpose(rnn_outputs, [1, 0, 2])
                        batch_size = tf.shape(rnn_outputs)[0]
                        stacked_rnn_outputs = tf.reshape(
                            rnn_outputs,
                            (batch_size * max_steps, cell._num_units))
                        self.rnn_features[name] = tf.pack(rnn_outputs)

                        pred = feedforward(stacked_rnn_outputs,
                                           d_hidden_sizes,
                                           hidden_nonlinearity,
                                           hidden_W_init,
                                           t_w_trainable,
                                           hidden_b_init,
                                           t_b_trainable,
                                           drop_prob=d_drop_prob)
                        self.disc_outputs[name] = tf.reshape(
                            pred, (batch_size, max_steps, 1))
                        # WARNING: using [-1] here might break when eos != max_traj_len
                        self.disc_logits[name] = self.disc_outputs[name][:,
                                                                         -1, :]

        if flip_reward or wgan:
            self.rewards = self.disc_outputs["target"]
        else:
            self.rewards = -tf.log(1.0 - tf.nn.sigmoid(
                self.disc_outputs["target"])) * (1. - disable_policy)

        # cost on "reward" classifier
        r_logits = tf.concat(
            0, [self.disc_logits["target"], self.disc_logits["source"]])
        r_labels = tf.concat(0, [
            tf.zeros_like(self.disc_logits["target"]),
            tf.ones_like(self.disc_logits["source"])
        ])
        if wgan:
            c_pi = tf.reduce_sum(self.disc_outputs["target"])
            c_ex = tf.reduce_sum(self.disc_outputs["source"])
            self.cost_disc = c_pi - c_ex
        else:
            self.cost_disc = tf.reduce_sum(
                tf.nn.sigmoid_cross_entropy_with_logits(r_logits, r_labels))
        # cost on domain "confusion" classifier
        c_logits = tf.concat(
            0, [self.conf_logits["target"], self.conf_logits["source"]])
        c_labels = tf.concat(0, [
            tf.zeros_like(self.conf_logits["target"]),
            tf.ones_like(self.conf_logits["source"])
        ])
        self.cost_conf = tf.constant(cost_weight) * tf.reduce_sum(
            tf.nn.sigmoid_cross_entropy_with_logits(c_logits, c_labels))

        #self.cost = self.cost_disc + self.cost_conf

        self.trainer = trainer(learning_rate=self.lr)
        self.opt_conf = self.trainer.minimize(self.cost_conf)
        self.opt_disc = self.trainer.minimize(self.cost_disc)

        self.clip_ops = []
        if self.wgan:
            for scope, value in clip_weights.items():
                vrs = [
                    v for v in tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES)
                    if v.name.split('/')[0] == "reward"
                    and v.name.split('/')[1] == scope
                ]
                self.clip_ops += [
                    v.assign(tf.clip_by_value(v, -value, value)) for v in vrs
                ]
Esempio n. 6
0
    def __init__(self,
                 name,
                 input_shape,
                 output_dim,
                 z_dim,
                 pre_hidden_sizes,
                 post_hidden_sizes,
                 hidden_nonlinearity,
                 output_nonlinearity,
                 hidden_W_init=L.XavierUniformInitializer(),
                 hidden_b_init=tf.zeros_initializer,
                 output_W_init=L.XavierUniformInitializer(),
                 output_b_init=tf.zeros_initializer,
                 batch_size=None,
                 input_var=None,
                 input_layer=None,
                 weight_normalization=False):

        Serializable.quick_init(self, locals())
        self.name = name

        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(batch_size, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer
            self._layers = [l_in]

            # construct graph
            l_hid = feedforward(l_in,
                                pre_hidden_sizes,
                                hidden_nonlinearity,
                                hidden_W_init=hidden_W_init,
                                hidden_b_init=hidden_b_init,
                                weight_normalization=weight_normalization,
                                start_idx=0)
            l_lat = L.LatentLayer(l_hid, z_dim)
            l_hid = feedforward(l_lat,
                                post_hidden_sizes,
                                hidden_nonlinearity,
                                hidden_W_init=hidden_W_init,
                                hidden_b_init=hidden_b_init,
                                weight_normalization=weight_normalization,
                                start_idx=len(pre_hidden_sizes))

            # create output layer
            l_out = L.DenseLayer(l_hid,
                                 num_units=output_dim,
                                 nonlinearity=output_nonlinearity,
                                 name="output",
                                 W=output_W_init,
                                 b=output_b_init,
                                 weight_normalization=weight_normalization)

            self._layers.append(l_out)
            self._l_lat = l_lat
            self._z_dim = z_dim
            self._l_in = l_in
            self._l_out = l_out
            self._l_tar = L.InputLayer(shape=(batch_size, ) + (output_dim, ),
                                       input_var=input_var,
                                       name="target")

            # complexity loss for variational posterior
            z_mu, z_sig = self._l_lat.get_dparams_for(
                L.get_output(self._l_lat.input_layer))
            self.kl_cost = kl_from_prior(z_mu, z_sig, self._z_dim)

            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)
Esempio n. 7
0
    def __init__(
        self,
        name,
        input_shape,
        output_dim,
        hidden_sizes,
        hidden_nonlinearity,
        output_nonlinearity,
        z_dim,
        z_idx,
        z_hidden_sizes,
        merge="mul",
        hidden_W_init=L.XavierUniformInitializer(),
        hidden_b_init=tf.zeros_initializer,
        output_W_init=L.XavierUniformInitializer(),
        output_b_init=tf.zeros_initializer,
        batch_size=None,
        input_var=None,
        input_layer=None,
        batch_normalization=False,
        weight_normalization=False,
    ):

        Serializable.quick_init(self, locals())
        self.name = name

        total_dim = np.prod(input_shape)

        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(batch_size, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer
            self._layers = [l_in]

            # slice off features / observation
            l_feat = L.SliceLayer(l_in,
                                  indices=slice(0, total_dim - z_dim),
                                  name="l_feat")

            # slice off z "style" variable
            l_z = L.SliceLayer(l_in,
                               indices=slice(total_dim - z_dim, total_dim),
                               name="l_z")

            l_pre = feedforward(l_feat,
                                hidden_sizes[:z_idx],
                                hidden_nonlinearity,
                                linear_output=True)
            with tf.variable_scope("z"):
                # if merging mul, ensure dimensionalities match.
                if merge == "mul":
                    _head = [total_dim] + hidden_sizes
                    _head = [_head[z_idx]]
                elif merge == "concat":
                    _head = []
                l_z = feedforward(l_z,
                                  z_hidden_sizes + _head,
                                  hidden_nonlinearity,
                                  linear_output=True)

            # merge latent code with features
            if merge == "mul":
                l_merge = L.ElemwiseMulLayer([l_pre, l_z])
            elif merge == "concat":
                l_merge = L.ConcatLayer([l_pre, l_z], axis=1)
            else:
                raise NotImplementedError

            if z_idx > 0:
                l_merge = L.NonlinearityLayer(l_merge, hidden_nonlinearity)
            l_hid = feedforward(l_merge,
                                hidden_sizes[z_idx:],
                                hidden_nonlinearity,
                                start_idx=z_idx)

            l_out = L.DenseLayer(l_hid,
                                 num_units=output_dim,
                                 nonlinearity=output_nonlinearity,
                                 name="output",
                                 W=output_W_init,
                                 b=output_b_init,
                                 weight_normalization=weight_normalization)
            #if batch_normalization:
            #    ls = L.batch_norm(l_out)
            #    l_out = ls[-1]
            #    self._layers += ls
            self._layers.append(l_out)
            self._l_in = l_in
            self._l_out = l_out
            self._l_tar = L.InputLayer(shape=(batch_size, ) + (output_dim, ),
                                       input_var=input_var,
                                       name="target")

            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)