예제 #1
0
    def __init__(self,
                 num_units,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 num_proj=None,
                 num_unit_shards=1,
                 num_proj_shards=1,
                 forget_bias=1.0,
                 bn=0,
                 return_gate=False,
                 deterministic=None,
                 activation=tanh):
        """
        Initialize the parameters for an LSTM cell.

        Args:
          num_units: int, The number of units in the LSTM cell
          use_peepholes: bool, set True to enable diagonal/peephole connections.
          cell_clip: (optional) A float value, if provided the cell state is clipped
            by this value prior to the cell output activation.
          initializer: (optional) The initializer to use for the weight and
            projection matrices.
          num_proj: (optional) int, The output dimensionality for the projection
            matrices.  If None, no projection is performed.
          num_unit_shards: How to split the weight matrix.  If >1, the weight
            matrix is stored across num_unit_shards.
          num_proj_shards: How to split the projection matrix.  If >1, the
            projection matrix is stored across num_proj_shards.
          forget_bias: Biases of the forget gate are initialized by default to 1
            in order to reduce the scale of forgetting at the beginning of
            the training.
          return_gate: bool, set true to return the values of the gates.
          bn: int, set 1,2 or 3 to enable sequence-wise batch normalization with
            different level. Implemented according to arXiv:1603.09025
          deterministic: Tensor, control training and testing phase, decide whether to
            open batch normalization.
          activation: Activation function of the inner states.
        """

        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._initializer = initializer
        self._num_proj = num_proj
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._activation = activation
        self._bn = bn
        self._return_gate = return_gate
        self._deterministic = deterministic
        self._return_gate = return_gate

        if num_proj:
            self._state_size = LSTMStateTuple(num_units, num_proj)
            self._output_size = num_proj
        else:
            self._state_size = LSTMStateTuple(num_units, num_units)
            self._output_size = num_units
예제 #2
0
def build_decoder_cell(rank, u_emb, batch_size, depth=2):
  cell = []
  for i in range(depth):
    if i == 0:
      cell.append(LSTMCell(rank, state_is_tuple=True))
    else:
      cell.append(ResidualWrapper(LSTMCell(rank, state_is_tuple=True)))
  initial_state = LSTMStateTuple(tf.zeros_like(u_emb), u_emb)
  initial_state = [initial_state, ]
  for i in range(1, depth):
    initial_state.append(cell[i].zero_state(batch_size, tf.float32))
  return MultiRNNCell(cell), tuple(initial_state)
예제 #3
0
    def __call__(self, x, state, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            c, h = state

            x_size = x.get_shape().as_list()[1]
            W_xh = tf.get_variable('W_xh', [x_size, 4 * self._num_units],
                                   initializer=orthogonal_initializer())
            W_hh = tf.get_variable(
                'W_hh', [self._num_units, 4 * self._num_units],
                initializer=bn_lstm_identity_initializer(0.95))
            bias = tf.get_variable('bias', [4 * self._num_units])

            xh = tf.matmul(x, W_xh)
            hh = tf.matmul(h, W_hh)

            bn_xh = batch_norm(xh, 'xh', self.training)
            bn_hh = batch_norm(hh, 'hh', self.training)

            hidden = bn_xh + bn_hh + bias

            i, j, f, o = tf.split(1, 4, hidden)

            new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * self._activation(j)
            bn_new_c = batch_norm(new_c, 'c', self.training)

            new_h = self._activation(bn_new_c) * tf.sigmoid(o)
            new_state = LSTMStateTuple(new_c, new_h)

            return new_h, new_state
예제 #4
0
    def __call__(self, x, state, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            c, h = state

            # Keep W_xh and W_hh separate here as well to reuse initialization methods
            x_size = x.get_shape().as_list()[1]
            W_xh = tf.get_variable('W_xh', [x_size, 4 * self.num_units],
                                   initializer=orthogonal_initializer())
            W_hh = tf.get_variable(
                'W_hh', [self.num_units, 4 * self.num_units],
                initializer=bn_lstm_identity_initializer(0.95))
            bias = tf.get_variable('bias', [4 * self.num_units])

            # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias
            # improve speed by concat.
            concat = tf.concat(1, [x, h])
            W_both = tf.concat(0, [W_xh, W_hh])
            hidden = tf.matmul(concat, W_both) + bias

            i, j, f, o = tf.split(1, 4, hidden)

            new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
            new_h = tf.tanh(new_c) * tf.sigmoid(o)
            new_state = LSTMStateTuple(new_c, new_h)

            return new_h, new_state
예제 #5
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM)."""
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        concat = self._line_sep([inputs, h], 4 * self._num_units, bias=False)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=concat,
                                     num_or_size_splits=4,
                                     axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
예제 #6
0
    def __call__(self, inputs, state, scope=None):
        """Convolutional Long short-term memory cell (ConvLSTM)."""
        with vs.variable_scope(scope or type(self).__name__):  # "ConvLSTMCell"
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = array_ops.split(3, 2, state)

            # batch_size * height * width * channel
            concat = _conv([inputs, h],
                           4 * self._num_units,
                           self._k_size,
                           True,
                           initializer=self._initializer)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(3, 4, concat)

            new_c = (c * sigmoid(f + self._forget_bias) +
                     sigmoid(i) * self._activation(j))
            new_h = self._activation(new_c) * sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = array_ops.concat(3, [new_c, new_h])
            return new_h, new_state
예제 #7
0
    def call(self, inputs, state):
        """LSTM cell with layer normalization and recurrent dropout."""
        c, h = state
        #args = array_ops.concat([inputs, h], 1)
        #concat = self._linear(args)
        #dtype = args.dtype
        concat = self._linear([inputs, h], self._num_units * 4, False)

        i, j, f, o = array_ops.split(value=concat,
                                     num_or_size_splits=4,
                                     axis=1)
        if self._layer_norm:
            i = self._norm(i, "input")
            j = self._norm(j, "transform")
            f = self._norm(f, "forget")
            o = self._norm(o, "output")

        g = self._activation(j)
        # if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1:
        #     g = nn_ops.dropout(g, self._keep_prob, seed=self._seed)
        new_c = (c * math_ops.sigmoid(f + self._forget_bias) +
                 math_ops.sigmoid(i) * g)
        if self._layer_norm:
            new_c = self._norm(new_c, "state")
        new_h = self._activation(new_c) * math_ops.sigmoid(o)

        new_state = LSTMStateTuple(new_c, new_h)
        return new_h, new_state
예제 #8
0
        def let_compute():
            var["compute_interface"] = True
            ns2 = controller_state

            if self.cache_attend_dim > 0:
                # values = utility.pack_into_tensor(cache_controller_hidden, axis=1)
                values = cache_controller_hidden.gather(
                    tf.range(time - time2, time + 1))

                value_size = self.hidden_controller_dim

                encoder_outputs = \
                    tf.reshape(values, [self.batch_size, -1, value_size])  # bs x Lin x h
                v = tf.reshape(
                    tf.matmul(tf.reshape(encoder_outputs, [-1, value_size]),
                              self.cU_a),
                    [self.batch_size, -1, self.cache_attend_dim])

                if self.use_mem:
                    v += tf.reshape(
                        tf.matmul(
                            tf.reshape(last_read_vectors,
                                       [-1, self.read_heads * self.word_size]),
                            self.cV_a),
                        [self.batch_size, 1, self.cache_attend_dim])
                ns, statetype = self.get_hidden_value_from_state(
                    controller_state)
                print("state typeppppp")
                print(controller_state)
                print(ns)
                v += tf.reshape(
                    tf.matmul(tf.reshape(ns, [-1, self.hidden_controller_dim]),
                              self.cW_a),
                    [self.batch_size, 1, self.cache_attend_dim
                     ])  # bs.Lin x h_att
                print('state include only h')

                v = tf.reshape(tf.tanh(v), [-1, self.cache_attend_dim])
                eijs = tf.matmul(v, tf.expand_dims(self.cv_a, 1))  # bs.Lin x 1
                eijs = tf.reshape(eijs, [self.batch_size, -1])  # bs x Lin
                alphas = tf.nn.softmax(eijs)

                att = tf.reduce_sum(encoder_outputs *
                                    tf.expand_dims(alphas, 2), 1)  # bs x h x 1
                att = tf.reshape(att, [self.batch_size, value_size])  # bs x h
                # step = tf.concat([var["step"], att], axis=-1)  # bs x (encoder_input_size + h)
                # step = tf.matmul(step, self.cW_ah) # bs x encoder_input_size (or emb_size)
                if statetype == 1:
                    ns2 = list(controller_state)
                    ns2[-1][-1] = att
                    ns2 = tuple(ns2)
                elif statetype == 2 or statetype == 3:
                    # ns2 = list(controller_state)
                    ns2 = LSTMStateTuple(controller_state[0], att)
                    # ns2 = tuple(ns2)
                elif statetype == 4:
                    return att

            return ns2
예제 #9
0
        def c2():
            con_c1=controller_state1[0]
            con_h1=controller_state1[1]
            con_c2 = controller_state2[0]
            con_h2 = controller_state2[1]

            ncontroller_state = LSTMStateTuple(tf.concat([con_c1,con_c2],axis=-1), tf.concat([con_h1,con_h2],axis=-1))
            nread_vec = tf.concat([last_read_vectors1, last_read_vectors2],axis=1)

            pre_output, interface, nn_state = \
                self.controller3.process_input(step1,
                                               nread_vec,
                                               ncontroller_state)
            #trick split than group
            c_l, c_r = tf.split(nn_state[0],num_or_size_splits=2, axis=-1)
            h_l, h_r = tf.split(nn_state[1], num_or_size_splits=2, axis=-1)
            return pre_output, interface, (LSTMStateTuple(c_l,h_l), LSTMStateTuple(c_r, h_r))
예제 #10
0
    def build_graph(self):
        """
        builds the computational graph that performs a step-by-step evaluation
        of the input data batches
        """

        self.unstacked_input_data = utility.unstack_into_tensorarray(
            self.input_data, 1, self.sequence_length)

        outputs = tf.TensorArray(tf.float32, self.sequence_length)
        free_gates = tf.TensorArray(tf.float32, self.sequence_length)
        allocation_gates = tf.TensorArray(tf.float32, self.sequence_length)
        write_gates = tf.TensorArray(tf.float32, self.sequence_length)
        read_weightings = tf.TensorArray(tf.float32, self.sequence_length)
        write_weightings = tf.TensorArray(tf.float32, self.sequence_length)
        usage_vectors = tf.TensorArray(tf.float32, self.sequence_length)

        controller_state = self.controller.get_state(
        ) if self.controller.has_recurrent_nn else (tf.zeros(1), tf.zeros(1))
        memory_state = self.memory.init_memory()
        if not isinstance(controller_state, LSTMStateTuple):
            controller_state = LSTMStateTuple(controller_state[0],
                                              controller_state[1])
        final_results = None

        with tf.compat.v1.variable_scope("sequence_loop") as scope:
            time = tf.constant(0, dtype=tf.int32)

            final_results = tf.while_loop(
                cond=lambda time, *_: time < self.sequence_length,
                body=self._loop_body,
                loop_vars=(time, memory_state, outputs, free_gates,
                           allocation_gates, write_gates, read_weightings,
                           write_weightings, usage_vectors, controller_state),
                parallel_iterations=32,
                swap_memory=True)

        dependencies = []
        if self.controller.has_recurrent_nn:
            dependencies.append(self.controller.update_state(final_results[9]))

        with tf.control_dependencies(dependencies):
            self.stacked_output = utility.stack_into_tensor(final_results[2],
                                                            axis=1)
            self.stacked_memory_view = {
                'free_gates':
                utility.stack_into_tensor(final_results[3], axis=1),
                'allocation_gates':
                utility.stack_into_tensor(final_results[4], axis=1),
                'write_gates':
                utility.stack_into_tensor(final_results[5], axis=1),
                'read_weightings':
                utility.stack_into_tensor(final_results[6], axis=1),
                'write_weightings':
                utility.stack_into_tensor(final_results[7], axis=1),
                'usage_vectors':
                utility.stack_into_tensor(final_results[8], axis=1)
            }
예제 #11
0
    def Planner(self, training_input, testing_input, label_status, length,
                mask):
        with tf.variable_scope('planner'):
            batch_size = self.batch_size / self.gpu_num

            rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers)

            w_status = tf.get_variable(
                'w_status', [self.n_hidden, 2],
                initializer=tf.contrib.layers.xavier_initializer())
            b_status = tf.get_variable(
                'b_status', [2],
                initializer=tf.contrib.layers.xavier_initializer())

            # training
            training_input_dropout = tf.nn.dropout(training_input,
                                                   self.keep_prob)  # b*l, h
            shape = training_input_dropout.get_shape().as_list()
            training_input_reshape = tf.reshape(
                training_input_dropout,
                [batch_size, self.max_step, shape[1]])  # b, l, h
            rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell,
                                              training_input_reshape,
                                              sequence_length=length,
                                              dtype=tf.float32)  # b, l, h
            rnn_output_dropout = tf.nn.dropout(rnn_output, self.keep_prob)
            rnn_output_reshape = tf.reshape(rnn_output_dropout,
                                            [-1, self.n_hidden])  # b*l, h
            logits = tf.reshape(tf.matmul(rnn_output_reshape, w_status),
                                [-1, 2]) + b_status  # b*l, n

            label_status_reshape = tf.reshape(label_status, [-1])
            loss_status = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=label_status_reshape, logits=logits)

            loss_status_scalar = tf.reduce_sum(loss_status * mask)

            # testing
            prev_state = []
            for l in xrange(self.n_layers):
                prev_state.append(
                    LSTMStateTuple(
                        tf.placeholder(tf.float32,
                                       shape=[None, self.n_hidden],
                                       name='initial_state{0}.c'.format(l)),
                        tf.placeholder(tf.float32,
                                       shape=[None, self.n_hidden],
                                       name='initial_state{0}.h'.format(l))))
            if self.n_layers == 1:
                prev_state = prev_state[0]

            rnn_output_test, state = rnn_cell(testing_input,
                                              prev_state)  # b*l, h
            prob = tf.reshape(
                tf.nn.softmax(tf.matmul(rnn_output_test, w_status) + b_status),
                [-1, 2])
            # pred_status_test = tf.argmax(prob, axis=1)
            return loss_status_scalar, prob, state, prev_state
예제 #12
0
    def call(self, inputs, state):
        (c_prev, m_prev) = state

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope("highway_lstm_cell",
                               initializer=self._initializer,
                               reuse=self._reuse):
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate, r = transform_gate
            num_weights = self.highway and 5 or 4
            with vs.variable_scope('hidden_weights'):
                hidden_matrix = linear_block_initialization(m_prev,
                                                            num_weights *
                                                            [self._num_units],
                                                            bias=False)

            num_weights = self.highway and 6 or 4
            with vs.variable_scope('input_weights'):
                input_matrix = linear_block_initialization(inputs,
                                                           num_weights *
                                                           [self._num_units],
                                                           bias=True)

            if self.highway:
                ih, jh, fh, oh, rh = array_ops.split(value=hidden_matrix,
                                                     num_or_size_splits=5,
                                                     axis=1)
                ix, jx, fx, ox, rx, hx = array_ops.split(value=input_matrix,
                                                         num_or_size_splits=6,
                                                         axis=1)

                i = sigmoid(ih + ix)
                o = sigmoid(oh + ox)
                f = sigmoid(fh + fx + self._forget_bias)
                j = self._activation(jh + jx)
                c = f * c_prev + i * j
                t = sigmoid(rh + rx)
                _m = o * self._activation(c)
                m = t * _m + (1 - t) * hx
            else:
                ih, jh, fh, oh = array_ops.split(value=hidden_matrix,
                                                 num_or_size_splits=4,
                                                 axis=1)
                ix, jx, fx, ox = array_ops.split(value=input_matrix,
                                                 num_or_size_splits=4,
                                                 axis=1)

                i = sigmoid(ih + ix)
                o = sigmoid(oh + ox)
                f = sigmoid(fh + fx + self._forget_bias)
                c = i * self._activation(jh + jx) + f * c_prev
                m = o * self._activation(c)

        new_state = (LSTMStateTuple(c, m))
        return m, new_state
예제 #13
0
 def p2():
     tmp = [(self.cur_mem_content[0], self.cur_u[0], self.cur_p[0],
             self.cur_L[0], self.cur_ww[0], self.cur_rw[0],
             self.cur_rv[0]),
            (self.cur_mem_content[1], self.cur_u[1], self.cur_p[1],
             self.cur_L[1], self.cur_ww[1], self.cur_rw[1],
             self.cur_rv[1])]
     if len(memory_state[0]) > len(tmp[0]):
         print('cache mode')
         tmp[0] = (self.cur_mem_content[0], self.cur_u[0],
                   self.cur_p[0], self.cur_L[0], self.cur_ww[0],
                   self.cur_rw[0], self.cur_rv[0],
                   memory_state[0][-2], memory_state[0][-1])
         tmp[1] = (self.cur_mem_content[1], self.cur_u[1],
                   self.cur_p[1], self.cur_L[1], self.cur_ww[1],
                   self.cur_rw[1], self.cur_rv[1],
                   memory_state[1][-2], memory_state[1][-1])
     return tmp, \
            LSTMStateTuple(self.cur_c[0], self.cur_h[0]),LSTMStateTuple(self.cur_c[1], self.cur_h[1])
예제 #14
0
    def call(self, inputs, state):
        sigmoid = math_ops.sigmoid
        one = constant_op.constant(1, dtype=dtypes.int32)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)

        dt = tf.tensordot(inputs, self._wt, axes=[[-1], [0]])
        dt = tf.tile(tf.expand_dims(dt, 1), [1, self._premise_length, 1])
        dm = tf.tensordot(h, self._wm, axes=[[-1], [0]])
        dm = tf.tile(tf.expand_dims(dm, 1), [1, self._premise_length, 1])
        e_kj = tf.tensordot(tf.nn.tanh(dt + self._ds + dm),
                            self._we,
                            axes=[[-1], [0]])
        e_kj = e_kj + (1. - self._premise_mask) * tf.float32.min
        alpha = tf.nn.softmax(e_kj, axis=1)
        a_k = tf.reduce_sum(tf.multiply(alpha, self._premise), axis=1)

        m_k = tf.concat([a_k, inputs], axis=1)

        gate_inputs = math_ops.matmul(array_ops.concat([m_k, h], 1),
                                      self._kernel)
        gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=gate_inputs,
                                     num_or_size_splits=4,
                                     axis=one)

        forget_bias_tensor = constant_op.constant(self._forget_bias,
                                                  dtype=f.dtype)
        # Note that using `add` and `multiply` instead of `+` and `*` gives a
        # performance improvement. So using those at the cost of readability.
        add = math_ops.add
        multiply = math_ops.multiply
        new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))),
                    multiply(sigmoid(i), self._activation(j)))
        new_h = multiply(self._activation(new_c), sigmoid(o))

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
예제 #15
0
파일: dnc.py 프로젝트: revz345/dnc
    def _loop_body(self, time, memory_state, outputs, free_gates,
                   allocation_gates, write_gates, read_weightings,
                   write_weightings, usage_vectors, controller_state):
        """
        the body of the DNC sequence processing loop

        Parameters:
        ----------
        time: Tensor
        outputs: TensorArray
        memory_state: Tuple
        free_gates: TensorArray
        allocation_gates: TensorArray
        write_gates: TensorArray
        read_weightings: TensorArray,
        write_weightings: TensorArray,
        usage_vectors: TensorArray,
        controller_state: Tuple

        Returns: Tuple containing all updated arguments
        """

        step_input = self.unpacked_input_data.read(time)

        output_list = self._step_op(step_input, memory_state, controller_state)

        # update memory parameters

        new_controller_state = tf.zeros(1)
        new_memory_state = tuple(output_list[0:7])

        new_controller_state = LSTMStateTuple(output_list[11], output_list[12])

        outputs = outputs.write(time, output_list[7])

        # collecting memory view for the current step
        free_gates = free_gates.write(time, output_list[8])
        allocation_gates = allocation_gates.write(time, output_list[9])
        write_gates = write_gates.write(time, output_list[10])
        read_weightings = read_weightings.write(time, output_list[5])
        write_weightings = write_weightings.write(time, output_list[4])
        usage_vectors = usage_vectors.write(time, output_list[1])

        return (time + 1, new_memory_state, outputs, free_gates,
                allocation_gates, write_gates, read_weightings,
                write_weightings, usage_vectors, new_controller_state)
예제 #16
0
    def Model(self):      
        conv1 = model_utils.Conv2D(self.depth_input, 4, (5, 5), (4, 4), scope='conv1') # b*l, h, w, c
        conv2 = model_utils.Conv2D(conv1, 16, (5, 5), (4, 4), scope='conv2') # b*l, h, w, c
        conv3 = model_utils.Conv2D(conv2, 32, (3, 3), (2, 2), scope='conv3') # b*l, h, w, c
        shape = conv3.get_shape().as_list()

        rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers)

        w_linear_a = tf.get_variable('w_linear', [self.n_hidden, 1], initializer=tf.initializers.random_uniform(-0.003, 0.003))
        w_angular_a = tf.get_variable('w_angular', [self.n_hidden, 1], initializer=tf.initializers.random_uniform(-0.003, 0.003))
        b_linear_a = tf.get_variable('b_linear_a', [1], initializer=tf.initializers.random_uniform(-0.003, 0.003))
        b_angular_a = tf.get_variable('b_angular_a', [1], initializer=tf.initializers.random_uniform(-0.003, 0.003))

        # training
        depth_vectors = tf.reshape(conv3, (self.batch_size, self.max_steps, shape[1]*shape[2]*shape[3])) # b, l, h

        rnn_outputs, _ = tf.nn.dynamic_rnn(rnn_cell, 
                                            depth_vectors, 
                                            sequence_length=self.lengths,
                                            dtype=tf.float32) # b, l, h

        rnn_outputs_reshape = tf.reshape(rnn_outputs, [-1, self.n_hidden]) # b*l, h

        a_linear = tf.nn.sigmoid(tf.matmul(rnn_outputs_reshape, w_linear_a) + b_linear_a) * self.action_range[0] # b*l, 1
        a_angular = tf.nn.tanh(tf.matmul(rnn_outputs_reshape, w_angular_a) + b_angular_a) * self.action_range[1] # b*l, 1
        a = tf.concat([a_linear, a_angular], axis=1)# b*l, 2

        # testing
        prev_rnn_state = []
        for l in xrange(self.n_layers):
            prev_rnn_state.append(
                LSTMStateTuple(tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state1{0}.c'.format(l)),
                               tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state1{0}.h'.format(l))))
        if self.n_layers == 1:
            prev_rnn_state = prev_rnn_state[0]

        depth_vectors_test = tf.reshape(conv3, (1, 1, shape[1]*shape[2]*shape[3])) # b, l, h

        rnn_outputs_test, rnn_state = rnn_cell(tf.reshape(depth_vectors_test, [-1, shape[1]*shape[2]*shape[3]]), prev_rnn_state)

        a_linear_test = tf.nn.sigmoid(tf.matmul(rnn_outputs_test, w_linear_a) + b_linear_a) * self.action_range[0] # b*l, 1
        a_angular_test = tf.nn.tanh(tf.matmul(rnn_outputs_test, w_angular_a) + b_angular_a) * self.action_range[1] # b*l, 1
        a_test = tf.concat([a_linear_test, a_angular_test], axis=1) # b*l, 2

        return a, a_test, rnn_state, prev_rnn_state
예제 #17
0
    def _loop_body(self, time, memory_state, outputs, read_weightings,
                   write_weightings, controller_state, write_vectors,
                   key_vectors, beta_vectors, shift_vectors, gamma_vectors,
                   gates_vectors, memory_vectors):
        """
        the body of the DNC sequence processing loop

        Parameters:
        ----------
        time: Tensor
        memory_state: Tuple
        outputs: TensorArray
        read_weightings: TensorArray,
        write_weightings: TensorArray,
        controller_state: Tuple

        Returns: Tuple containing all updated arguments
        """

        step_input = self.unpacked_input_data.read(time)
        output_list = self._step_op(step_input, memory_state, controller_state)

        # update memory parameters
        new_controller_state = tf.zeros(1)
        new_memory_state = tuple(output_list[0:4])

        new_controller_state = LSTMStateTuple(output_list[5], output_list[6])

        outputs = outputs.write(time, output_list[4])

        # collecting memory view for the current step
        read_weightings = read_weightings.write(time, output_list[2])
        write_weightings = write_weightings.write(time, output_list[1])
        write_vectors = write_vectors.write(time, output_list[7])
        key_vectors = key_vectors.write(time, output_list[8])
        beta_vectors = beta_vectors.write(time, output_list[9])
        shift_vectors = shift_vectors.write(time, output_list[10])
        gamma_vectors = gamma_vectors.write(time, output_list[11])
        gates_vectors = gates_vectors.write(time, output_list[12])
        memory_vectors = memory_vectors.write(time, output_list[0])

        return (time + 1, new_memory_state, outputs, read_weightings,
                write_weightings, new_controller_state, write_vectors,
                key_vectors, beta_vectors, shift_vectors, gamma_vectors,
                gates_vectors, memory_vectors)
예제 #18
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM).
		Args:
			inputs: `2-D` tensor with shape `[batch_size x input_size]`.
			state: An `LSTMStateTuple` of state tensors, each shaped
				`[batch_size x self.state_size]`, if `state_is_tuple` has been set to
				`True`.  Otherwise, a `Tensor` shaped
				`[batch_size x 2 * self.state_size]`.
		Returns:
			A pair containing the new hidden state, and the new state (either a
				`LSTMStateTuple` or a concatenated state, depending on
				`state_is_tuple`).
		"""
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        if self._linear is None:
            self._linear = _Linear([inputs, h], 4 * self._num_units, True)
            if self._state_keep_prob < 1.0:
                weights = self._linear._weights
                input_size = weights.get_shape().as_list()[0] - self._num_units
                input_weights, state_weights = array_ops.split(
                    weights, [input_size, self._num_units])
                state_weights = state_weights * self._mask_tensor
                self._linear._weights = array_ops.concat(
                    [input_weights, state_weights], 0)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=self._linear([inputs, h]),
                                     num_or_size_splits=4,
                                     axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
예제 #19
0
    def Model(self):
        conv1 = model_utils.Conv2D(self.depth_input, 4, (5, 5), (4, 4), scope='conv1') # b*l, h, w, c
        conv2 = model_utils.Conv2D(conv1, 16, (5, 5), (4, 4), scope='conv2') # b*l, h, w, c
        conv3 = model_utils.Conv2D(conv2, 32, (3, 3), (2, 2), scope='conv3') # b*l, h, w, c
        shape = conv3.get_shape().as_list()

        rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers)

        w_q = tf.get_variable('w_q', [self.n_hidden, 1], initializer=tf.initializers.random_uniform(-0.003, 0.003))
        b_q = tf.get_variable('b_q', [1], initializer=tf.initializers.random_uniform(-0.003, 0.003))

        # training
        depth_vectors = tf.reshape(conv3, (self.batch_size, self.max_steps, shape[1]*shape[2]*shape[3]), name='train_d_reshape') # b, l, h*w*c
        action_input_reshape = tf.reshape(self.action_input, (self.batch_size, self.max_steps, 2), name='train_a_reshape') # b, l, 2
        inputs = tf.concat([depth_vectors, action_input_reshape], axis=2) # b, l, h*w*c+2

        rnn_outputs, _ = tf.nn.dynamic_rnn(rnn_cell, 
                                            inputs, 
                                            sequence_length=self.lengths,
                                            dtype=tf.float32) # b, l, h

        rnn_outputs_reshape = tf.reshape(rnn_outputs, [-1, self.n_hidden]) # b*l, h

        q = tf.matmul(rnn_outputs_reshape, w_q) + b_q # b*l, 1
        # q = tf.reshape(q, (self.batch_size, self.max_steps, 1))

        # testing
        depth_vectors_test = tf.reshape(conv3, (1, 1, shape[1]*shape[2]*shape[3]), name='test_d_reshape') # b, l, h*w*c
        action_input_reshape_test = tf.reshape(self.action_input, (1, 1, 2), name='test_a_reshape') # b, l, 2
        inputs_test = tf.concat([depth_vectors_test, action_input_reshape_test], axis=2) # b, l, h*w*c+2

        prev_rnn_state = []
        for l in xrange(self.n_layers):
            prev_rnn_state.append(
                LSTMStateTuple(tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state1{0}.c'.format(l)),
                               tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state1{0}.h'.format(l))))
        if self.n_layers == 1:
            prev_rnn_state = prev_rnn_state[0]

        rnn_outputs_test, rnn_state = rnn_cell(tf.reshape(inputs_test, (-1, shape[1]*shape[2]*shape[3]+2)), prev_rnn_state)

        q_test = tf.matmul(rnn_outputs_test, w_q) + b_q # b*l, 1

        return q, q_test, rnn_state, prev_rnn_state
예제 #20
0
    def impress(self, state_code, pre_impress_states, is_first_in_impress):
        with tf.variable_scope('impress', reuse=tf.AUTO_REUSE):

            def loop_fn(time, cell_output, cell_state, loop_state):
                if cell_output is None:  #time = 0
                    # initialization
                    input = state_code
                    state = state_
                    emit_output = None
                    loop_state = None
                else:
                    input = cell_output
                    emit_output = cell_output
                    state = cell_state
                    loop_state = None

                elements_finished = (time >= 1)
                return (elements_finished, input, state, emit_output,
                        loop_state)

            multirnn_cell = MultiRNNCell([
                LSTMCell(self.impress_dim) for _ in range(self.impress_lay_num)
            ],
                                         state_is_tuple=True)

            if is_first_in_impress == True:
                state_ = (multirnn_cell.zero_state(self.batch_size,
                                                   tf.float32))
            else:
                pre_impress_states = tf.unstack(pre_impress_states, axis=0)
                state_ = tuple([
                    LSTMStateTuple(pre_impress_states[idx][0],
                                   pre_impress_states[idx][1])
                    for idx in range(self.impress_lay_num)
                ])

            emit_ta, states, final_loop_state = tf.nn.raw_rnn(
                multirnn_cell, loop_fn)
            state_impress_code = tf.transpose(emit_ta.stack(), [
                1, 0, 2
            ])[0]  # transpose for putting batch dimension to first dimension

            return state_impress_code, states
  def call(self, inputs, state):
    """Long short-term memory cell (LSTM).
    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size, num_units]`, if `state_is_tuple` has been set to
        `True`.  Otherwise, a `Tensor` shaped
        `[batch_size, 2 * num_units]`.
    Returns:
      A pair containing the new hidden state, and the new state (either a
        `LSTMStateTuple` or a concatenated state, depending on
        `state_is_tuple`).
    """
    sigmoid = math_ops.sigmoid
    one = constant_op.constant(1, dtype=dtypes.int32)
    # Parameters of gates are concatenated into one multiply for efficiency.
    if self._state_is_tuple:
      c, h = state
    else:
      c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)

    gate_inputs = math_ops.matmul(
        array_ops.concat([inputs, h], 1), self._kernel)
    gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    i, j, f, o = array_ops.split(
        value=gate_inputs, num_or_size_splits=4, axis=one)

    forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype)
    # Note that using `add` and `multiply` instead of `+` and `*` gives a
    # performance improvement. So using those at the cost of readability.
    add = math_ops.add
    multiply = math_ops.multiply
    new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))),
                multiply(sigmoid(i), self._activation(j)))
    new_h = multiply(self._activation(new_c), sigmoid(o))

    if self._state_is_tuple:
      new_state = LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)
    return new_h, new_state
예제 #22
0
 def Encoder(self):
     # a list that length is batch_size, every element refers to the time_steps of corresponding input
     inputs_length = tf.fill([tf.shape(self.xs)[0]], self.input_timestep)
     rnn_cell = LSTMCell(self.encoder_units)
     # use bidirectional rnn as encoder architecture
     (fw_outputs,
      bw_outputs), (fw_final_state,
                    bw_final_state) = (tf.nn.bidirectional_dynamic_rnn(
                        cell_fw=rnn_cell,
                        cell_bw=rnn_cell,
                        inputs=self.xs,
                        sequence_length=inputs_length,
                        dtype=self.dtype))
     # merge every forward and backward output as total output
     output = tf.add(fw_outputs, bw_outputs) / 2
     # merge every forward and backward final state as final state
     state_c = tf.concat([fw_final_state.c, bw_final_state.c], axis=1)
     state_h = tf.concat([fw_final_state.h, bw_final_state.h], axis=1)
     final_state = LSTMStateTuple(c=state_c, h=state_h)
     return output, final_state
예제 #23
0
파일: conv_lstm.py 프로젝트: DS3Lab/odlc
    def init_state(self, batch_size_tensor):
        if self._data_format == 'NHWC':
            state_shape = [
                batch_size_tensor, self._out_height, self._out_width,
                self._num_units
            ]
        elif self._data_format == 'NCHW':
            state_shape = [
                batch_size_tensor, self._num_units, self._out_height,
                self._out_width
            ]
        else:
            raise ValueError(
                "invalid data format. Expected one of [`NHWC`, `NCHW`], got {}"
                .format(self._data_format))

        c = tf.fill(dims=state_shape, value=0.0, name='c')
        h = tf.fill(dims=state_shape, value=0.0, name='h')

        return LSTMStateTuple(c, h)
예제 #24
0
    def call(self, inputs, state):

        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = tf.split(value=state, num_or_size_splits=2, axis=one)

        h = tf.matmul(tf.concat([inputs, h], 1), self._kernel)

        c, h = fused_lstm_gates(c,
                                h,
                                bias=self._bias,
                                forget_bias=self._forget_bias)

        if self._state_is_tuple:
            state = LSTMStateTuple(c, h)
        else:
            state = tf.concat([c, h], 1)

        return h, state
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = tf.split(state, 2, 3)
            concat = _conv_linear([inputs, h], self.filter_size, self.num_features * 4, True)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = tf.split(concat, 4, 3)

            new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * tf.nn.sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = tf.concat([new_c, new_h], 3)
            return new_h, new_state
예제 #26
0
    def __call__(self, inputs, state, scope=None):
        """Run the cell with the declared zoneouts."""

        # compute output and new state as before
        output, new_state = self._cell(inputs, state, scope)

        # if either hidden state or memory cell zoneout is applied, then split state and process
        if self._has_hidden_state_zoneout or self._has_memory_cell_zoneout:
            # split state
            c_old, m_old = state
            c_new, m_new = new_state

            # apply zoneout to memory cell and hidden state
            c_and_m = []
            for s_old, s_new, p, has_zoneout in [
                (c_old, c_new, self._memory_cell_keep_prob,
                 self._has_memory_cell_zoneout),
                (m_old, m_new, self._hidden_state_keep_prob,
                 self._has_hidden_state_zoneout)
            ]:
                if has_zoneout:
                    if self._is_training:
                        mask = nn_ops.dropout(
                            array_ops.ones_like(s_new), p, seed=self._seed
                        ) * p  # this should just random ops instead. See dropout code for how.
                        s = ((1. - mask) * s_old) + (mask * s_new)
                    else:
                        s = ((1. - p) * s_old) + (p * s_new)
                else:
                    s = s_new

                c_and_m.append(s)

            # package final results
            new_state = LSTMStateTuple(*c_and_m)
            output = new_state.h

        return output, new_state
예제 #27
0
    def __call__(self, input, state, scope=None):
        """Convolutional long short-term memory cell (ConvLSTM)."""

        with variable_scope(scope or 'ConvLSTMCell'):
            previous_memory, previous_output = state

            with variable_scope('Expand'):
                batch_size = int(previous_memory.get_shape()[0])
                shape = [
                    batch_size, self._height, self._width, self._num_units
                ]
                input = reshape(input, shape)
                previous_memory = reshape(previous_memory, shape)
                previous_output = reshape(previous_output, shape)

            with variable_scope('Convolve'):
                x = concat(3, [input, previous_output])
                W = get_variable(
                    'Weights',
                    self._kernel + [2 * self._num_units, 4 * self._num_units])
                b = get_variable('Biases', [4 * self._num_units],
                                 initializer=constant_initializer(0.0))
                y = conv2d(x, W, [1, 1, 1, 1], 'SAME') + b
                input_gate, new_input, forget_gate, output_gate = split(
                    3, 4, y)

            with variable_scope('LSTM'):
                memory = (previous_memory *
                          sigmoid(forget_gate + self._forget_bias) +
                          sigmoid(input_gate) * self._activation(new_input))
                output = self._activation(memory) * sigmoid(output_gate)

            with variable_scope('Flatten'):
                shape = [-1, self._height * self._width * self._num_units]
                output = reshape(output, shape)
                memory = reshape(memory, shape)

            return output, LSTMStateTuple(memory, output)
예제 #28
0
    def encoder(self):
        ####Encoder
        with tf.variable_scope(self.model_name + "encoder_model"):
            if self.Bidirection == False:
                encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.num_units)
                self.encoder_outputs, self.encoder_final_state = tf.nn.dynamic_rnn(
                    cell=encoder_cell,
                    inputs=self.encoder_inputs_embedded,
                    sequence_length=self.encoder_inputs_length,
                    time_major=False,
                    dtype=tf.float32)
                self.hidden_units = self.num_units

            elif self.Bidirection == True:
                encoder_cell_fw = LSTMCell(self.num_units)
                encoder_cell_bw = LSTMCell(self.num_units)
                ((encoder_fw_outputs, encoder_bw_outputs),
                 (encoder_fw_final_state,
                  encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn(
                      cell_fw=encoder_cell_fw,
                      cell_bw=encoder_cell_bw,
                      inputs=self.encoder_inputs_embedded,
                      sequence_length=self.encoder_inputs_length,
                      dtype=tf.float32,
                      time_major=False))
                # Concatenates tensors along one dimension.
                encoder_outputs = tf.concat(
                    (encoder_fw_outputs, encoder_bw_outputs), 2)

                encoder_final_state_c = tf.concat(
                    (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
                encoder_final_state_h = tf.concat(
                    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

                # TF Tuple used by LSTM Cells for state_size, zero_state, and output state.
                self.encoder_final_state = LSTMStateTuple(
                    c=encoder_final_state_c, h=encoder_final_state_h)
                self.hidden_units = 2 * self.num_units
예제 #29
0
    def call(self, inputs, state):
        if not nest.is_sequence(state):
            raise ValueError("Expected state to be a tuple of length %d, but receive: %s" % (len(self.state_size), state))
        n_layer = len(state)
        c, h = state[self._layer_pos]
        concat_h = array_ops.concat([s[-1] for s in state], axis=1)

        with variable_scope('input-forget-output-gates'):
            conc = _linear([inputs, h], 3 * self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer)
            i, f, o = array_ops.split(conc, 3, axis=1)
        with variable_scope('scalar-gates'):
            gates = _linear([inputs, concat_h], n_layer, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer)
        with variable_scope('gated-inputs'):
            gated_h = \
                _linear(array_ops.reshape(array_ops.expand_dims(gates, axis=2) * array_ops.expand_dims(h, axis=1), (-1, n_layer * self._num_units)),
                        self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer)
        with variable_scope('new-inputs'):
            new_inputs = \
                self._activation(_linear(inputs, self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) + gated_h)
        new_c = self._activation(c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * new_inputs)
        new_h = new_c * math_ops.sigmoid(o)
        new_state = LSTMStateTuple(new_c, new_h)
        return (new_h, new_state)
예제 #30
0
    def __call__(self, inputs, state, scope=None):
        """Convolutional Long short-term memory cell (ConvLSTM)."""
        with vs.variable_scope(scope or type(self).__name__): # "ConvLSTMCell"
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = array_ops.split(3, 2, state)
            s1 = vs.get_variable("s1", initializer=tf.ones([self._height, self._width, 4 * self._num_units]), dtype=tf.float32)
            s2 = vs.get_variable("s2", initializer=tf.ones([self._height, self._width, 4 * self._num_units]), dtype=tf.float32)
            # s3 = vs.get_variable("s3", initializer=tf.ones([self._batch_size, self._num_units]), dtype=tf.float32)

            b1 = vs.get_variable("b1", initializer=tf.zeros([self._height, self._width, 4 * self._num_units]), dtype=tf.float32)
            b2 = vs.get_variable("b2", initializer=tf.zeros([self._height, self._width, 4 * self._num_units]), dtype=tf.float32)
            # b3 = vs.get_variable("b3", initializer=tf.zeros([self._batch_size, self._num_units]), dtype=tf.float32)
            input_below_ = _conv([inputs], 4 * self._num_units, self._k_size, False, initializer=self._initializer, scope="out_1")
            input_below_ = ln(input_below_, s1, b1)
            state_below_ = _conv([h], 4 * self._num_units, self._k_size, False, initializer=self._initializer, scope="out_2")
            state_below_ = ln(state_below_, s2, b2)
            lstm_matrix = tf.add(input_below_, state_below_)

            i, j, f, o = array_ops.split(3, 4, lstm_matrix)

            # batch_size * height * width * channel
            # concat = _conv([inputs, h], 4 * self._num_units, self._k_size, True, initializer=self._initializer)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            # i, j, f, o = array_ops.split(3, 4, lstm_matrix)

            new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = array_ops.concat(3, [new_c, new_h])
            return new_h, new_state