Ejemplo n.º 1
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 hidden_dim,
                 name=None,
                 hidden_nonlinearity=tf.nn.relu,
                 output_w_init=ly.XavierUniformInitializer(),
                 recurrent_nonlinearity=tf.nn.sigmoid,
                 recurrent_w_x_init=ly.XavierUniformInitializer(),
                 recurrent_w_h_init=ly.OrthogonalInitializer(),
                 lstm_layer_cls=ly.LSTMLayer,
                 output_nonlinearity=None,
                 input_var=None,
                 input_layer=None,
                 forget_bias=1.0,
                 use_peepholes=False,
                 layer_args=None):
        with tf.variable_scope(name, 'LSTMNetwork'):
            if input_layer is None:
                l_in = ly.InputLayer(shape=(None, None) + input_shape,
                                     input_var=input_var,
                                     name='input')
            else:
                l_in = input_layer
            l_step_input = ly.InputLayer(shape=(None, ) + input_shape,
                                         name='step_input')
            # contains previous hidden and cell state
            l_step_prev_state = ly.InputLayer(shape=(None, hidden_dim * 2),
                                              name='step_prev_state')
            if layer_args is None:
                layer_args = dict()
            l_lstm = lstm_layer_cls(l_in,
                                    num_units=hidden_dim,
                                    hidden_nonlinearity=hidden_nonlinearity,
                                    gate_nonlinearity=recurrent_nonlinearity,
                                    hidden_init_trainable=False,
                                    name='lstm_layer',
                                    forget_bias=forget_bias,
                                    cell_init_trainable=False,
                                    w_x_init=recurrent_w_x_init,
                                    w_h_init=recurrent_w_h_init,
                                    use_peepholes=use_peepholes,
                                    **layer_args)
            l_lstm_flat = ly.ReshapeLayer(l_lstm,
                                          shape=(-1, hidden_dim),
                                          name='lstm_flat')
            l_output_flat = ly.DenseLayer(l_lstm_flat,
                                          num_units=output_dim,
                                          nonlinearity=output_nonlinearity,
                                          w=output_w_init,
                                          name='output_flat')
            l_output = ly.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input: tf.reshape(
                    flat_output,
                    tf.stack(
                        (tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name='output')
            l_step_state = l_lstm.get_step_layer(l_step_input,
                                                 l_step_prev_state,
                                                 name='step_state')
            l_step_hidden = ly.SliceLayer(l_step_state,
                                          indices=slice(hidden_dim),
                                          name='step_hidden')
            l_step_cell = ly.SliceLayer(l_step_state,
                                        indices=slice(hidden_dim, None),
                                        name='step_cell')
            l_step_output = ly.DenseLayer(l_step_hidden,
                                          num_units=output_dim,
                                          nonlinearity=output_nonlinearity,
                                          w=l_output_flat.w,
                                          b=l_output_flat.b,
                                          name='step_output')

            self._l_in = l_in
            self._hid_init_param = l_lstm.h0
            self._cell_init_param = l_lstm.c0
            self._l_lstm = l_lstm
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_cell = l_step_cell
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Ejemplo n.º 2
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 hidden_dim,
                 name=None,
                 hidden_nonlinearity=tf.nn.relu,
                 lstm_layer_cls=ly.LSTMLayer,
                 output_nonlinearity=None,
                 input_var=None,
                 input_layer=None,
                 forget_bias=1.0,
                 use_peepholes=False,
                 layer_args=None):
        with tf.variable_scope(name, "LSTMNetwork"):
            if input_layer is None:
                l_in = ly.InputLayer(
                    shape=(None, None) + input_shape,
                    input_var=input_var,
                    name="input")
            else:
                l_in = input_layer
            l_step_input = ly.InputLayer(
                shape=(None, ) + input_shape, name="step_input")
            # contains previous hidden and cell state
            l_step_prev_state = ly.InputLayer(
                shape=(None, hidden_dim * 2), name="step_prev_state")
            if layer_args is None:
                layer_args = dict()
            l_lstm = lstm_layer_cls(
                l_in,
                num_units=hidden_dim,
                hidden_nonlinearity=hidden_nonlinearity,
                hidden_init_trainable=False,
                name="lstm_layer",
                forget_bias=forget_bias,
                cell_init_trainable=False,
                use_peepholes=use_peepholes,
                **layer_args)
            l_lstm_flat = ly.ReshapeLayer(
                l_lstm, shape=(-1, hidden_dim), name="lstm_flat")
            l_output_flat = ly.DenseLayer(
                l_lstm_flat,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output_flat")
            l_output = ly.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input: tf.reshape(
                    flat_output,
                    tf.stack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))
                ),
                shape_op=lambda flat_output_shape, l_input_shape: (
                    l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output")
            l_step_state = l_lstm.get_step_layer(
                l_step_input, l_step_prev_state, name="step_state")
            l_step_hidden = ly.SliceLayer(
                l_step_state, indices=slice(hidden_dim), name="step_hidden")
            l_step_cell = ly.SliceLayer(
                l_step_state,
                indices=slice(hidden_dim, None),
                name="step_cell")
            l_step_output = ly.DenseLayer(
                l_step_hidden,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                w=l_output_flat.w,
                b=l_output_flat.b,
                name="step_output")

            self._l_in = l_in
            self._hid_init_param = l_lstm.h0
            self._cell_init_param = l_lstm.c0
            self._l_lstm = l_lstm
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_cell = l_step_cell
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Ejemplo n.º 3
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 hidden_dim,
                 name=None,
                 hidden_nonlinearity=tf.nn.relu,
                 output_w_init=ly.XavierUniformInitializer(),
                 recurrent_nonlinearity=tf.nn.sigmoid,
                 recurrent_w_x_init=ly.XavierUniformInitializer(),
                 recurrent_w_h_init=ly.OrthogonalInitializer(),
                 gru_layer_cls=ly.GRULayer,
                 output_nonlinearity=None,
                 input_var=None,
                 input_layer=None,
                 layer_args=None):
        with tf.variable_scope(name, 'GRUNetwork'):
            if input_layer is None:
                l_in = ly.InputLayer(shape=(None, None) + input_shape,
                                     input_var=input_var,
                                     name='input')
            else:
                l_in = input_layer
            l_step_input = ly.InputLayer(shape=(None, ) + input_shape,
                                         name='step_input')
            l_step_prev_state = ly.InputLayer(shape=(None, hidden_dim),
                                              name='step_prev_state')
            if layer_args is None:
                layer_args = dict()
            l_gru = gru_layer_cls(l_in,
                                  num_units=hidden_dim,
                                  hidden_nonlinearity=hidden_nonlinearity,
                                  gate_nonlinearity=recurrent_nonlinearity,
                                  hidden_init_trainable=False,
                                  w_x_init=recurrent_w_x_init,
                                  w_h_init=recurrent_w_h_init,
                                  name='gru',
                                  **layer_args)
            l_gru_flat = ly.ReshapeLayer(l_gru,
                                         shape=(-1, hidden_dim),
                                         name='gru_flat')
            l_output_flat = ly.DenseLayer(l_gru_flat,
                                          num_units=output_dim,
                                          nonlinearity=output_nonlinearity,
                                          w=output_w_init,
                                          name='output_flat')
            l_output = ly.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input: tf.reshape(
                    flat_output,
                    tf.stack(
                        (tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name='output')
            l_step_state = l_gru.get_step_layer(l_step_input,
                                                l_step_prev_state,
                                                name='step_state')
            l_step_hidden = l_step_state
            l_step_output = ly.DenseLayer(l_step_hidden,
                                          num_units=output_dim,
                                          nonlinearity=output_nonlinearity,
                                          w=l_output_flat.w,
                                          b=l_output_flat.b,
                                          name='step_output')

            self._l_in = l_in
            self._hid_init_param = l_gru.h0
            self._l_gru = l_gru
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Ejemplo n.º 4
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 hidden_dim,
                 name=None,
                 hidden_nonlinearity=tf.nn.relu,
                 gru_layer_cls=ly.GRULayer,
                 output_nonlinearity=None,
                 input_var=None,
                 input_layer=None,
                 layer_args=None):
        with tf.variable_scope(name, "GRUNetwork"):
            if input_layer is None:
                l_in = ly.InputLayer(
                    shape=(None, None) + input_shape,
                    input_var=input_var,
                    name="input")
            else:
                l_in = input_layer
            l_step_input = ly.InputLayer(
                shape=(None, ) + input_shape, name="step_input")
            l_step_prev_state = ly.InputLayer(
                shape=(None, hidden_dim), name="step_prev_state")
            if layer_args is None:
                layer_args = dict()
            l_gru = gru_layer_cls(
                l_in,
                num_units=hidden_dim,
                hidden_nonlinearity=hidden_nonlinearity,
                hidden_init_trainable=False,
                name="gru",
                **layer_args)
            l_gru_flat = ly.ReshapeLayer(
                l_gru, shape=(-1, hidden_dim), name="gru_flat")
            l_output_flat = ly.DenseLayer(
                l_gru_flat,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output_flat")
            l_output = ly.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input: tf.reshape(
                    flat_output,
                    tf.stack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))
                ),
                shape_op=lambda flat_output_shape, l_input_shape: (
                    l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output")
            l_step_state = l_gru.get_step_layer(
                l_step_input, l_step_prev_state, name="step_state")
            l_step_hidden = l_step_state
            l_step_output = ly.DenseLayer(
                l_step_hidden,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                w=l_output_flat.w,
                b=l_output_flat.b,
                name="step_output")

            self._l_in = l_in
            self._hid_init_param = l_gru.h0
            self._l_gru = l_gru
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim