Esempio n. 1
0
    def __call__(self, inputs, state, scope=None):
        def replace_w(x):
            if x.op.name.endswith('Matrix'):
                return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
            else:
                return x

        with bit_utils.replace_variable(replace_w):
            with tf.variable_scope(scope or type(self).__name__):
                with tf.variable_scope("Gates"):
                    r, u = tf.split(
                        1, 2,
                        tf.nn.rnn_cell._linear([inputs, state],
                                               2 * self._num_units, True, 1.0))
                    r, u = tf.sigmoid(r), tf.sigmoid(u)
                with tf.variable_scope("Candidate"):
                    c = self._activation(
                        tf.nn.rnn_cell._linear([
                            inputs,
                            bit_utils.round_bit(r * state, bit=self._f_bit)
                        ], self._num_units, True))
                    c = bit_utils.round_bit(c, bit=self._f_bit)
                new_h = bit_utils.round_bit(u * state + (1 - u) * c,
                                            bit=self._f_bit)
        return new_h, new_h
Esempio n. 2
0
    def call(self, inputs, state):
        def replace_w(x):
            if x.op.name.endswith('kernel'):
                return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
            else:
                return x

        with bit_utils.replace_variable(replace_w):
            sigmoid = tf.sigmoid
            # Parameters of gates are concatenated into one multiply for
            # efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

            if self._linear is None:
                # self._linear = rnn_cell_impl._Linear(
                self._linear = core_rnn_cell._Linear(
                    [inputs, h], 4 * self._num_units, True)
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = tf.split(
                value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)

            new_c = (
                c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
            new_h = bit_utils.round_bit(self._activation(
                new_c) * sigmoid(o), bit=self._f_bit)

            if self._state_is_tuple:
                new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)
            else:
                new_state = tf.concat([new_c, new_h], 1)
        return new_h, new_state
Esempio n. 3
0
    def __call__(self, inputs, state, scope=None):
        def replace_w(x):
            if x.op.name.endswith('Matrix'):
                return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
            else:
                return x

        with bit_utils.replace_variable(replace_w):
            with tf.variable_scope(scope or type(self).__name__):
                if self._state_is_tuple:
                    c, h = state
                else:
                    c, h = tf.split(1, 2, state)
                concat = tf.nn.rnn_cell._linear([inputs, h],
                                                4 * self._num_units, True)

                i, j, f, o = tf.split(1, 4, concat)

                new_c = (c * tf.sigmoid(f + self._forget_bias) +
                         tf.sigmoid(i) * self._activation(j))
                new_h = bit_utils.round_bit(self._activation(new_c) *
                                            tf.sigmoid(o),
                                            bit=self._f_bit)

                if self._state_is_tuple:
                    new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)
                else:
                    new_state = tf.concat(1, [new_c, new_h])
        return new_h, new_state
Esempio n. 4
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM).
        Args:
          inputs: `2-D` tensor with shape `[batch_size, input_size]`.
          state: An `LSTMStateTuple` of state tensors, each shaped
            `[batch_size, self.state_size]`, if `state_is_tuple` has been set to
            `True`.  Otherwise, a `Tensor` shaped
            `[batch_size, 2 * self.state_size]`.
        Returns:
          A pair containing the new hidden state, and the new state (either a
            `LSTMStateTuple` or a concatenated state, depending on
            `state_is_tuple`).
        """

        B = self._block_size
        # print('state_size')
        # print(state.get_shape().as_list())
        sigmoid = math_ops.sigmoid
        one = constant_op.constant(1, dtype=dtypes.int32)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)

        #gate_inputs = math_ops.matmul(
        #    array_ops.concat([inputs, h], 1), self._kernel)

        gate_inputs = BH_dense(inputs,
                               4 * self._num_units,
                               B,
                               self.transform,
                               kernel_weights=self._kernel)
        # gate_inputs = BH_matmul(
        #    array_ops.concat([inputs, h], 1), self._kernel, B, "Fourier")
        gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=gate_inputs,
                                     num_or_size_splits=4,
                                     axis=one)

        forget_bias_tensor = constant_op.constant(self._forget_bias,
                                                  dtype=f.dtype)
        # Note that using `add` and `multiply` instead of `+` and `*` gives a
        # performance improvement. So using those at the cost of readability.
        add = math_ops.add
        multiply = math_ops.multiply
        #multiply = Circ_matmul()
        new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))),
                    multiply(sigmoid(i), self._activation(j)))
        new_h = multiply(self._activation(new_c), sigmoid(o))
        new_h = bit_utils.round_bit(new_h, self._f_bit)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
Esempio n. 5
0
 def reset_lstm_state(self):
     conf = Config()
     s = self.state
     z = tf.zeros_like(s[0].c)
     print("\n==> Zeroing state\n")
     z = bit_utils.round_bit(tf.sigmoid(z), bit=conf.f_bit)
     # print("\nResetting state\n")
     return tf.group(s[0].c.assign(z),
                     s[0].h.assign(z),
                     s[1].c.assign(z),
                     s[1].h.assign(z),
                     name='reset_lstm_state')
Esempio n. 6
0
    def call(self, inputs, state):
        def replace_w(x):
            if x.op.name.endswith('kernel'):
                return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
            else:
                return x

        with bit_utils.replace_variable(replace_w):
            if self._gate_linear is None:
                bias_ones = self._bias_initializer
                if self._bias_initializer is None:
                    bias_ones = tf.constant_initializer(
                        1.0, dtype=inputs.dtype)
                with tf.variable_scope("gates"):  # Reset gate and update gate.
                    # self._gate_linear = rnn_cell_impl._Linear(
                    self._gate_linear = core_rnn_cell._Linear(
                        [inputs, state],
                        2 * self._num_units,
                        True,
                        bias_initializer=bias_ones,
                        kernel_initializer=self._kernel_initializer)

            value = tf.sigmoid(self._gate_linear([inputs, state]))
            r, u = tf.split(value=value, num_or_size_splits=2, axis=1)

            r_state = bit_utils.round_bit(r * state, bit=self._f_bit)
            if self._candidate_linear is None:
                with tf.variable_scope("candidate"):
                    # self._candidate_linear = rnn_cell_impl._Linear(
                    self._candidate_linear = core_rnn_cell._Linear(
                        [inputs, r_state],
                        self._num_units,
                        True,
                        bias_initializer=self._bias_initializer,
                        kernel_initializer=self._kernel_initializer)
            c = self._activation(self._candidate_linear([inputs, r_state]))
            c = bit_utils.round_bit(c, bit=self._f_bit)
            new_h = bit_utils.round_bit(
                u * state + (1 - u) * c, bit=self._f_bit)
        return new_h, new_h
Esempio n. 7
0
    def _build_graph(self, inputs):
        conf = Config()

        is_training = get_current_tower_context().is_training
        input, nextinput = inputs
        initializer = tf.uniform_unit_scaling_initializer()

        # initializer = tf.random_uniform_initializer(-0.05, 0.05)

        def get_basic_cell():
            # cell = rnn.BasicLSTMCell(num_units=conf.hidden_size, forget_bias=0.0, reuse=tf.get_variable_scope().reuse)
            cell = bit_rnn.BitLSTMCell(
                num_units=conf.hidden_size,
                w_bit=conf.w_bit,
                f_bit=conf.f_bit,  #)
                forget_bias=0.0,
                reuse=tf.get_variable_scope().reuse)
            if is_training and conf.keep_prob < 1:
                cell = bit_rnn.DropoutWrapper(cell,
                                              output_keep_prob=conf.keep_prob)
            return cell

        cell = rnn.MultiRNNCell(
            [get_basic_cell() for _ in range(conf.num_layers)])

        def get_v(n):
            return tf.get_variable(
                n,
                [conf.batch_size, conf.hidden_size],  #,[BATCH, HIDDEN_SIZE],
                trainable=False,
                initializer=tf.constant_initializer())

        self.state = state_var = \
            (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')),
             rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))
        embeddingW = tf.get_variable(
            'embedding', [conf.vocab_size, conf.hidden_size],
            initializer=initializer)  #tf.random_uniform_initializer)
        input_feature = tf.nn.embedding_lookup(
            embeddingW, input)  # B x seqlen x hiddensize

        print("\n-> Input Rounding")
        input_feature = bit_utils.round_bit(tf.nn.relu(input_feature),
                                            bit=conf.f_bit)

        if is_training and conf.keep_prob < 1:
            input_feature = Dropout(input_feature, conf.keep_prob)

        # print("\n\nThe STATE:")
        # print(self.state)

        with tf.variable_scope('LSTM', initializer=initializer):
            input_list = tf.unstack(input_feature, num=conf.num_steps,
                                    axis=1)  # seqlen x (Bxhidden)
            outputs, last_state = rnn.static_rnn(cell,
                                                 input_list,
                                                 state_var,
                                                 scope='rnn')

        update_state_ops = []
        for k in range(conf.num_layers):
            update_state_ops.extend([
                tf.assign(state_var[k].c, last_state[k].c),
                tf.assign(state_var[k].h, last_state[k].h)
            ])

        def replace_w(x):
            # if x.op.name.endswith('Matrix'):
            if x.op.name.endswith('W'):
                print("\nKERNEL Before quantize name: " + x.op.name)
                return bit_utils.quantize_w(tf.tanh(x), bit=conf.w_bit)
            elif x.op.name.endswith('b'):
                print("\nbias Before round name: " + x.op.name)
                # tf.summary.histogram(x.name, x)
                return x
                return bit_utils.round_bit_whist(x, bit=conf.w_bit)
            else:
                print("\nNOT Quantizing:" + x.op.name)
                tf.summary.histogram(x.name, x)
                return x

        # seqlen x (Bxrnnsize)
        output = tf.reshape(tf.concat(outputs, 1),
                            [-1, conf.hidden_size])  # (Bxseqlen) x hidden
        with bit_utils.replace_variable(replace_w):
            # lambda x: bit_utils.quantize_w(tf.tanh(x), bit=conf.w_bit)):
            logits = FullyConnected('fc',
                                    output,
                                    conf.vocab_size,
                                    nl=tf.identity,
                                    W_init=initializer,
                                    b_init=initializer)
        # logits = FullyConnected('fc', output, conf.vocab_size, nl=tf.identity, W_init=initializer, b_init=initializer)
        xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.reshape(nextinput, [-1]))

        with tf.control_dependencies(update_state_ops):
            self.cost = tf.truediv(tf.reduce_sum(xent_loss),
                                   tf.cast(conf.batch_size, tf.float32),
                                   name='cost')  # log-perplexity

        perpl = tf.exp(self.cost / conf.num_steps, name='perplexity')
        summary.add_moving_summary(perpl, self.cost)
Esempio n. 8
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        if 'cell_type' not in dir(config) or config.cell_type == 'gru':
            cell = BitGRUCell(size, w_bit=config.w_bit, f_bit=config.f_bit)
        elif config.cell_type == 'lstm':
            cell = BitLSTMCell(size, w_bit=config.w_bit, f_bit=config.f_bit)
        if is_training and config.keep_prob < 1:
            cell = tf.nn.rnn_cell.DropoutWrapper(
                cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)
        self._initial_state = bit_utils.round_bit(tf.sigmoid(
            self._initial_state),
                                                  bit=config.f_bit)

        embedding = tf.get_variable(
            "embedding", [vocab_size, size],
            initializer=tf.random_uniform_initializer())
        inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        inputs = bit_utils.round_bit(tf.nn.relu(inputs), bit=config.f_bit)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, num_steps, inputs)
        ]
        outputs, state = tf.nn.rnn(cell,
                                   inputs,
                                   initial_state=self._initial_state)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        with bit_utils.replace_variable(
                lambda x: bit_utils.quantize_w(tf.tanh(x), bit=config.w_bit)):
            softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))