Beispiel #1
0
    def build(self, inputs_shape):
        B = self._block_size
        if inputs_shape[1].value is None:
            raise ValueError(
                "Expected inputs.shape[-1] to be known, saw shape: %s" %
                inputs_shape)

        input_depth = inputs_shape[1].value
        h_depth = self._num_units
        self._kernel = self.add_variable(_WEIGHTS_VARIABLE_NAME,
                                         shape=[(input_depth + h_depth),
                                                4 * self._num_units])

        self._bias = self.add_variable(
            _BIAS_VARIABLE_NAME,
            shape=[4 * self._num_units],
            initializer=init_ops.zeros_initializer(dtype=self.dtype))

        if self.quant == "binary":
            self._kernel = binarize(self._kernel)
            self._bias = binarize(self._bias)
        elif self.quant == "ternary":
            self._kernel = ternarize(self._kernel)
            self._bias = ternarize(self._bias)
        elif self.quant == "bit":
            self._kernel = bit_utils.quantize_w(self._kernel, self._w_bit)

        self.built = True
Beispiel #2
0
 def replace_w(x):
     # if x.op.name.endswith('Matrix'):
     if x.op.name.endswith('W'):
         print("\nKERNEL Before quantize name: " + x.op.name)
         return bit_utils.quantize_w(tf.tanh(x), bit=conf.w_bit)
     elif x.op.name.endswith('b'):
         print("\nbias Before round name: " + x.op.name)
         # tf.summary.histogram(x.name, x)
         return x
         return bit_utils.round_bit_whist(x, bit=conf.w_bit)
     else:
         print("\nNOT Quantizing:" + x.op.name)
         tf.summary.histogram(x.name, x)
         return x
Beispiel #3
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        if 'cell_type' not in dir(config) or config.cell_type == 'gru':
            cell = BitGRUCell(size, w_bit=config.w_bit, f_bit=config.f_bit)
        elif config.cell_type == 'lstm':
            cell = BitLSTMCell(size, w_bit=config.w_bit, f_bit=config.f_bit)
        if is_training and config.keep_prob < 1:
            cell = tf.nn.rnn_cell.DropoutWrapper(
                cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)
        self._initial_state = bit_utils.round_bit(tf.sigmoid(
            self._initial_state),
                                                  bit=config.f_bit)

        embedding = tf.get_variable(
            "embedding", [vocab_size, size],
            initializer=tf.random_uniform_initializer())
        inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        inputs = bit_utils.round_bit(tf.nn.relu(inputs), bit=config.f_bit)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, num_steps, inputs)
        ]
        outputs, state = tf.nn.rnn(cell,
                                   inputs,
                                   initial_state=self._initial_state)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        with bit_utils.replace_variable(
                lambda x: bit_utils.quantize_w(tf.tanh(x), bit=config.w_bit)):
            softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Beispiel #4
0
 def replace_w(x):
     if x.op.name.endswith('kernel'):
         return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
     else:
         return x