def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size, stride, layer_no, act_max): """ Helper function to build the depth-wise separable convolution layer. """ expansion = 6 # skip pointwise by setting num_outputs=None expansion_conv = slim.conv2d(inputs=inputs, num_outputs=inputs.shape[3].value * expansion, stride=stride, kernel_size=[1, 1], scope=sc + '/exp_conv') if (act_max[2 * layer_no + 1] > 0): expansion_conv = tf.fake_quant_with_min_max_vars(expansion_conv, min=-act_max[2 * layer_no + 1], max=act_max[2 * layer_no + 1] - ( act_max[2 * layer_no + 1] / 128.0), num_bits=8, name='quant_exp_conv' + str(layer_no + 1)) bn = tf.nn.relu6(expansion_conv) depthwise_conv = slim.separable_conv2d(bn, num_outputs=None, stride=stride, depth_multiplier=1, kernel_size=kernel_size, scope=sc + '/dw_conv') if (act_max[2 * layer_no] > 0): depthwise_conv = tf.fake_quant_with_min_max_vars(depthwise_conv, min=-act_max[2 * layer_no], max=act_max[2 * layer_no] - (act_max[2 * layer_no] / 128.0), num_bits=8, name='quant_dw_conv' + str(layer_no)) bn = tf.nn.relu6(depthwise_conv) projection_conv = slim.conv2d(bn, num_outputs=num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pj_conv', activation_fn=None) if (act_max[2 * layer_no + 1] > 0): projection_conv = tf.fake_quant_with_min_max_vars(projection_conv, min=-act_max[2 * layer_no + 1], max=act_max[2 * layer_no + 1] - ( act_max[2 * layer_no + 1] / 128.0), num_bits=8, name='quant_pj_conv' + str(layer_no + 1)) bn = tf.nn.relu6(projection_conv) # return bn if stride == 2: return bn else: if inputs.shape[3].value != num_pwc_filters: residual_conv = slim.conv2d(inputs=bn, num_outputs=num_pwc_filters, kernel_size=[1, 1], scope=sc + '/res_conv') if (act_max[2 * layer_no + 1] > 0): bn = tf.fake_quant_with_min_max_vars(residual_conv, min=-act_max[2 * layer_no + 1], max=act_max[2 * layer_no + 1] - ( act_max[2 * layer_no + 1] / 128.0), num_bits=8, name='quant_res_conv' + str(layer_no + 1)) # bn = tf.nn.relu6(residual_conv) return bn
def create_dnn_model(fingerprint_input, model_settings, model_size_info, act_max, is_training): """Builds a model with multiple hidden fully-connected layers. model_size_info: length of the array defines the number of hidden-layers and each element in the array represent the number of neurons in that layer """ if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] num_layers = len(model_size_info) layer_dim = [fingerprint_size] layer_dim.extend(model_size_info) flow = fingerprint_input if (act_max[0] != 0): flow = tf.fake_quant_with_min_max_vars(flow, min=-act_max[0], \ max=act_max[0]-(act_max[0]/128.0), num_bits=8) for i in range(1, num_layers + 1): with tf.variable_scope('fc' + str(i)): W = tf.get_variable( 'W', shape=[layer_dim[i - 1], layer_dim[i]], initializer=tf.contrib.layers.xavier_initializer()) b = tf.get_variable('b', shape=[layer_dim[i]]) flow = tf.matmul(flow, W) + b if (act_max[i] != 0): flow = tf.fake_quant_with_min_max_vars(flow, min=-act_max[i], \ max=act_max[i]-(act_max[i]/128.0), num_bits=8) flow = tf.nn.relu(flow) if is_training: flow = tf.nn.dropout(flow, dropout_prob) weights = tf.get_variable( 'final_fc', shape=[layer_dim[-1], label_count], initializer=tf.contrib.layers.xavier_initializer()) bias = tf.Variable(tf.zeros([label_count])) logits = tf.matmul(flow, weights) + bias if (act_max[num_layers + 1] != 0): logits = tf.fake_quant_with_min_max_vars(logits, min=-act_max[num_layers+1], \ max=act_max[num_layers+1]-(act_max[num_layers+1]/128.0), num_bits=8) if is_training: return logits, dropout_prob else: return logits
def _conv2d(self, num_bits, x, filters, kernels, strides=1, bias=False, padding='SAME', name='conv2d'): with tf.variable_scope(name): n_input_plane = x.get_shape().as_list()[3] w_dim = [kernels, kernels, n_input_plane, filters] w = tf.get_variable("weight", w_dim, initializer=tf.keras.initializers.RandomNormal( mean=0., stddev=1.), regularizer=self.conv2d_regularizer) if self.add_fake_quant: w_min = tf.reduce_min(w) w_max = tf.reduce_max(w) w = tf.fake_quant_with_min_max_vars(w, w_min, w_max, num_bits) output = tf.nn.conv2d(x, w, [1, strides, strides, 1], padding) if bias: b = tf.get_variable('bias', [filters]) output = tf.nn.bias_add(output, b) # tf.logging.info('conv2d output tensor: %s'%x.get_shape()) return output
def _depthwise_conv2d(self, num_bits, x, filters, kernels, strides=1, bias=False, padding='SAME', name='depthwise_conv2d'): with tf.variable_scope(name): n_input_plane = x.get_shape().as_list()[3] w_dim = [kernels, kernels, filters, 1] w = tf.get_variable( "weight", w_dim, initializer=tf.contrib.layers.xavier_initializer_conv2d()) if self.add_fake_quant: w_min = tf.reduce_min(w) w_max = tf.reduce_max(w) w = tf.fake_quant_with_min_max_vars(w, w_min, w_max, num_bits) output = tf.nn.depthwise_conv2d(x, w, [1, strides, strides, 1], padding) if bias: b = tf.get_variable('bias', [filters]) output = tf.nn.bias_add(output, b) # tf.logging.info('depthwise output tensor: %s'%x.get_shape()) return output
def make_minmax_quantization(config, data): variable = config['variable'] sign_passthrough = config['sign_passthrough'] num_bits = config['num_bits'] zero_passthrough = config['zero_passthrough'] min_bound = data['min_bound'] max_bound = data['max_bound'] quantized_variable = tf.fake_quant_with_min_max_vars( tf.abs(variable) if sign_passthrough else variable, min_bound, max_bound, num_bits=num_bits - int(sign_passthrough), narrow_range=zero_passthrough, name='quantized') if sign_passthrough: # this implies zero_passthrough sign = tf.sign(variable, name='var_sign') quantized_variable = tf.multiply(quantized_variable, sign, name='quantized_with_sign') elif zero_passthrough: mask = tf.not_equal(variable, 0.0, name='sparse_mask') quantized_variable = tf.multiply(quantized_variable, tf.cast(mask, quantized_variable.dtype), name='quantized_masked') return quantized_variable
def _get_quantized_weights(shape, dtype): # pylint: disable=unused-argument assert tuple(shape) == self.weights.shape # Default values used in TFLiteRegistry. return tf.fake_quant_with_min_max_vars(self.weights, -6.0, 6.0, num_bits=8, narrow_range=True)
def _get_quantized_weights(shape, dtype): # pylint: disable=unused-argument assert tuple(shape) == self.weights.shape return tf.fake_quant_with_min_max_vars( self.weights, -6.0, 6.0, num_bits=self.quant_params['num_bits'], narrow_range=self.quant_params['narrow_range'])
def quantize_op(inputs, is_training=True, is_quantized=True, default_min=0, default_max=6, ema_decay=0.999, scope='quant'): """Inserts a fake quantization op after inputs. Args: inputs: A tensor of size [batch_size, height, width, channels]. is_training: true if the graph is a training graph. is_quantized: flag to enable/disable quantization. default_min: default min value for fake quant op. default_max: default max value for fake quant op. ema_decay: the moving average decay for the quantization variables. scope: Optional scope for variable_scope. Returns: Tensor resulting from quantizing the input tensors. """ if is_quantized: with tf.variable_scope(scope): min_var = _quant_var('min', default_min) max_var = _quant_var('max', default_max) if is_training: min_val = moving_averages.assign_moving_average( min_var, tf.reduce_min(inputs), ema_decay, name='AssignMinEma') max_val = moving_averages.assign_moving_average( max_var, tf.reduce_max(inputs), ema_decay, name='AssignMaxEma') inputs = tf.fake_quant_with_min_max_vars( inputs, min_val, max_val) else: inputs = tf.fake_quant_with_min_max_vars( inputs, min_var, max_var) return inputs
def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size, stride, layer_no, act_max): """ Helper function to build the depth-wise separable convolution layer. """ # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=stride, depth_multiplier=1, kernel_size=kernel_size, scope=sc + '/dw_conv', reuse=tf.AUTO_REUSE) if (act_max[2 * layer_no] > 0): depthwise_conv = tf.fake_quant_with_min_max_vars( depthwise_conv, min=-act_max[2 * layer_no], max=act_max[2 * layer_no] - (act_max[2 * layer_no] / 128.0), num_bits=8, name='quant_ds_conv' + str(layer_no)) bn = tf.nn.relu(depthwise_conv) # batch-norm weights folded into depthwise conv # bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_conv/batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pw_conv', reuse=tf.AUTO_REUSE) if (act_max[2 * layer_no + 1] > 0): pointwise_conv = tf.fake_quant_with_min_max_vars( pointwise_conv, min=-act_max[2 * layer_no + 1], max=act_max[2 * layer_no + 1] - (act_max[2 * layer_no + 1] / 128.0), num_bits=8, name='quant_pw_conv' + str(layer_no + 1)) bn = tf.nn.relu(pointwise_conv) # batch-norm weights folded into pointwise conv # bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_conv/batch_norm') return bn
def quantLayer(x, clipRange, numBits): """ Function of the lambda layer used to simulate quantisation to numBits between the layers -> use real shift does not really work because we simply do best if we just do not shift because tensorflow does not use fixpoint arithmetic inside the layers opposite to ARM # x = tf.to_int32(x) # x = tf.bitwise.right_shift(x, outShift) # x = tf.to_float(x) """ x = tf.fake_quant_with_min_max_vars(x, min=clipRange[0], max=clipRange[1], num_bits=numBits) return x
def _fake_quant_with_min_max_vars(self, inputs, min_var, max_var, per_channel, num_bits, narrow_range): """Adds a fake quantization operation.""" if per_channel: assert len(min_var.get_shape()) == 1 assert len(max_var.get_shape()) == 1 return tf.fake_quant_with_min_max_vars_per_channel( inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range) else: assert min_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison assert max_var.get_shape() == [] # pylint: disable=g-explicit-bool-comparison return tf.fake_quant_with_min_max_vars( inputs, min_var, max_var, num_bits=num_bits, narrow_range=narrow_range)
def testQuantizesOutputsFromLayer(self): # TODO(pulkitb): Increase coverage by adding other output quantize layers # such as AveragePooling etc. layer = layers.ReLU() quantized_model = keras.Sequential([ QuantizeWrapper( layers.ReLU(), quantize_provider=self.quantize_registry.get_quantize_provider( layer)) ]) model = keras.Sequential([layers.ReLU()]) inputs = np.random.rand(1, 2, 1) expected_output = tf.fake_quant_with_min_max_vars( model.predict(inputs), -6.0, 6.0, num_bits=8, narrow_range=False) self.assertAllClose(expected_output, quantized_model.predict(inputs))
def testQuantizesWeights_KerasLayers(self, layer_type, input_shape, kwargs): self.weights = None def _get_random_weights(shape, dtype): # pylint: disable=unused-argument self.weights = np.random.rand(*shape) return self.weights def _get_quantized_weights(shape, dtype): # pylint: disable=unused-argument assert tuple(shape) == self.weights.shape # Default values used in TFLiteRegistry. return tf.fake_quant_with_min_max_vars(self.weights, -6.0, 6.0, num_bits=8, narrow_range=True) layer = layer_type(kernel_initializer=_get_random_weights, **kwargs) quantized_model = keras.Sequential([ QuantizeWrapper( layer=layer, quantize_provider=self.quantize_registry.get_quantize_provider( layer), input_shape=input_shape) ]) # `model` gets constructed with same parameters as `quantized_model`. The # weights used are a quantized version of weights used in `quantized_model`. # This ensures the results of both the models should be the same since # quantization has been applied externally to `model`. model = keras.Sequential([ layer_type(input_shape=input_shape, kernel_initializer=_get_quantized_weights, **kwargs) ]) inputs = np.random.rand(1, *input_shape) # `quantized_model` should apply FakeQuant. Explicitly applying to the # results of `model` to verify QuantizeWrapper works as expected. expected_output = tf.fake_quant_with_min_max_vars( model.predict(inputs), -6.0, 6.0, num_bits=8, narrow_range=False) self.assertAllClose(expected_output, quantized_model.predict(inputs))
def _relu6(self, num_bits, x): with tf.variable_scope("act"): x = tf.nn.relu6(x) if self.add_fake_quant: x = tf.fake_quant_with_min_max_vars(x, 0.0, 6.0, num_bits) return x
def quantizable_concat(inputs, axis, is_training, is_quantized=True, default_min=0, default_max=6, ema_decay=0.999, scope='quantized_concat'): """Concat replacement with quantization option. Allows concat inputs to share the same min max ranges, from experimental/gazelle/synthetic/model/tpu/utils.py. Args: inputs: list of tensors to concatenate. axis: dimension along which to concatenate. is_training: true if the graph is a training graph. is_quantized: flag to enable/disable quantization. default_min: default min value for fake quant op. default_max: default max value for fake quant op. ema_decay: the moving average decay for the quantization variables. scope: Optional scope for variable_scope. Returns: Tensor resulting from concatenation of input tensors """ if is_quantized: with tf.variable_scope(scope): tf.logging.info('inputs: {}'.format(inputs)) for t in inputs: tf.logging.info(t) min_var = _quant_var('min', default_min) max_var = _quant_var('max', default_max) if not is_training: # If we are building an eval graph just use the values in the variables. quant_inputs = [ tf.fake_quant_with_min_max_vars(t, min_var, max_var) for t in inputs ] tf.logging.info('min_val: {}'.format(min_var)) tf.logging.info('max_val: {}'.format(max_var)) else: concat_tensors = tf.concat(inputs, axis=axis) tf.logging.info('concat_tensors: {}'.format(concat_tensors)) # TFLite requires that 0.0 is always in the [min; max] range. range_min = tf.minimum(tf.reduce_min(concat_tensors), 0.0, name='SafeQuantRangeMin') range_max = tf.maximum(tf.reduce_max(concat_tensors), 0.0, name='SafeQuantRangeMax') # Otherwise we need to keep track of the moving averages of the min and # of the elements of the input tensor max. min_val = moving_averages.assign_moving_average( min_var, range_min, ema_decay, name='AssignMinEma') max_val = moving_averages.assign_moving_average( max_var, range_max, ema_decay, name='AssignMaxEma') tf.logging.info('min_val: {}'.format(min_val)) tf.logging.info('max_val: {}'.format(max_val)) quant_inputs = [ tf.fake_quant_with_min_max_vars(t, min_val, max_val) for t in inputs ] tf.logging.info('quant_inputs: {}'.format(quant_inputs)) outputs = tf.concat(quant_inputs, axis=axis) tf.logging.info('outputs: {}'.format(outputs)) else: outputs = tf.concat(inputs, axis=axis) return outputs
def create_ds_cnn_model(fingerprint_input, model_settings, model_size_info, act_max, is_training): """Builds a model with depthwise separable convolutional neural network Model definition is based on https://arxiv.org/abs/1704.04861 and Tensorflow implementation: https://github.com/Zehaos/MobileNet model_size_info: defines number of layers, followed by the DS-Conv layer parameters in the order {number of conv features, conv filter height, width and stride in y,x dir.} for each of the layers. Note that first layer is always regular convolution, but the remaining layers are all depthwise separable convolutions. """ def ds_cnn_arg_scope(weight_decay=0): """Defines the default ds_cnn argument scope. Args: weight_decay: The weight decay to use for regularizing the model. Returns: An `arg_scope` to use for the DS-CNN model. """ with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay)) as sc: return sc def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size, stride, layer_no, act_max): """ Helper function to build the depth-wise separable convolution layer. """ # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=stride, depth_multiplier=1, kernel_size=kernel_size, scope=sc + '/dw_conv', reuse=tf.AUTO_REUSE) if (act_max[2 * layer_no] > 0): depthwise_conv = tf.fake_quant_with_min_max_vars( depthwise_conv, min=-act_max[2 * layer_no], max=act_max[2 * layer_no] - (act_max[2 * layer_no] / 128.0), num_bits=8, name='quant_ds_conv' + str(layer_no)) bn = tf.nn.relu(depthwise_conv) # batch-norm weights folded into depthwise conv # bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_conv/batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pw_conv', reuse=tf.AUTO_REUSE) if (act_max[2 * layer_no + 1] > 0): pointwise_conv = tf.fake_quant_with_min_max_vars( pointwise_conv, min=-act_max[2 * layer_no + 1], max=act_max[2 * layer_no + 1] - (act_max[2 * layer_no + 1] / 128.0), num_bits=8, name='quant_pw_conv' + str(layer_no + 1)) bn = tf.nn.relu(pointwise_conv) # batch-norm weights folded into pointwise conv # bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_conv/batch_norm') return bn if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') label_count = model_settings['label_count'] input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] fingerprint_4d = tf.reshape(fingerprint_input, [-1, input_time_size, input_frequency_size, 1]) t_dim = input_time_size f_dim = input_frequency_size #Extract model dimensions from model_size_info num_layers = model_size_info[0] conv_feat = [None] * num_layers conv_kt = [None] * num_layers conv_kf = [None] * num_layers conv_st = [None] * num_layers conv_sf = [None] * num_layers i = 1 for layer_no in range(0, num_layers): conv_feat[layer_no] = model_size_info[i] i += 1 conv_kt[layer_no] = model_size_info[i] i += 1 conv_kf[layer_no] = model_size_info[i] i += 1 conv_st[layer_no] = model_size_info[i] i += 1 conv_sf[layer_no] = model_size_info[i] i += 1 if act_max is None: act_max = [0] * (num_layers * 2 + 2) scope = 'DS-CNN' with tf.variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, decay=0.96, updates_collections=None, activation_fn=tf.nn.relu): if act_max[0] > 0: fingerprint_4d = tf.fake_quant_with_min_max_vars( fingerprint_4d, min=-act_max[0], max=act_max[0] - (act_max[0] / 128.0), num_bits=8, name='quant_input') for layer_no in range(0, num_layers): if layer_no == 0: net = slim.convolution2d(fingerprint_4d, conv_feat[layer_no],\ [conv_kt[layer_no], conv_kf[layer_no]], stride=[conv_st[layer_no], conv_sf[layer_no]], padding='SAME', scope='conv_1', reuse=tf.AUTO_REUSE) if act_max[1] > 0: net = tf.fake_quant_with_min_max_vars( net, min=-act_max[1], max=act_max[1] - (act_max[1] / 128.0), num_bits=8, name='quant_conv1') net = tf.nn.relu(net) #net = slim.batch_norm(net, scope='conv_1/batch_norm') else: net = _depthwise_separable_conv(net, conv_feat[layer_no], \ kernel_size = [conv_kt[layer_no],conv_kf[layer_no]], \ stride = [conv_st[layer_no],conv_sf[layer_no]], sc='conv_ds_'+str(layer_no), layer_no = layer_no, act_max = act_max) t_dim = math.ceil(t_dim / float(conv_st[layer_no])) f_dim = math.ceil(f_dim / float(conv_sf[layer_no])) net = slim.avg_pool2d(net, [t_dim, f_dim], scope='avg_pool') if act_max[2 * num_layers] > 0: net = tf.fake_quant_with_min_max_vars( net, min=-act_max[2 * num_layers], max=act_max[2 * num_layers] - (act_max[2 * num_layers] / 128.0), num_bits=8, name='quant_pool') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') logits = slim.fully_connected(net, label_count, activation_fn=None, scope='fc1', reuse=tf.AUTO_REUSE) if act_max[2 * num_layers + 1] > 0: logits = tf.fake_quant_with_min_max_vars( logits, min=-act_max[2 * num_layers + 1], max=act_max[2 * num_layers + 1] - (act_max[2 * num_layers + 1] / 128.0), num_bits=8, name='quant_fc') if is_training: return logits, dropout_prob else: return logits
def main(_): # dir base dirname = os.path.dirname(os.path.abspath(__file__)) exportbase = os.path.join(dirname, "export") # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) # Create fake quant the model x = tf.placeholder(tf.float32, [None, 784]) x_min = tf.Variable(0.0, name='x_min') x_max = tf.Variable(6.0, name='x_max') x_q = tf.fake_quant_with_min_max_vars(x, x_min, x_max, name='x_q') W = tf.Variable(tf.zeros([784, 10]), name='W') W_min = tf.Variable(0.0, name='W_min') W_max = tf.Variable(6.0, name='W_max') W_q = tf.fake_quant_with_min_max_vars(W, W_min, W_max, name='W_q') b = tf.Variable(tf.zeros([10]), name='b') b_min = tf.Variable(0.0, name='b_min') b_max = tf.Variable(6.0, name='b_max') b_q = tf.fake_quant_with_min_max_vars(b, b_min, b_max, name='b_q') # y_fc = tf.matmul(x, W) + b y_fc = tf.matmul(x_q, W_q) + b_q # fake_quant y_min = tf.Variable(0.0, name='q_min') y_max = tf.Variable(6.0, name='q_max') y = tf.fake_quant_with_min_max_vars(y_fc, y_min, y_max, name='y_q') # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10]) # The raw formulation of cross-entropy, # # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)), # reduction_indices=[1])) # # can be numerically unstable. # # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw # outputs of 'y', and then average across the batch. cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) sess = tf.InteractiveSession() tf.global_variables_initializer().run() # Add ops to save and restore all the variables. saver = tf.train.Saver() # summary summary_writer = tf.summary.FileWriter(os.path.join(dirname, "summary"), graph=tf.get_default_graph()) # Train for _ in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) # Test trained model correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print("YMK: accuracy") print( sess.run(accuracy, feed_dict={ x: mnist.test.images, y_: mnist.test.labels })) print("YMK: save to pb and ckpt") pb_path = tf.train.write_graph(sess.graph_def, dirname, "mnist.pb", False) print(" GraphDef saved in file: %s" % pb_path) ckpt_path = saver.save(sess, os.path.join(dirname, "ckpts", "model.ckpt")) print(" Model saved in file: %s" % ckpt_path) # print W print("YMK: print details") print(" W:") print(W.eval()) print(" W_min:") print(W_min.eval()) print(" W_max:") print(W_max.eval()) print(" b:") print(b.eval()) # Save a batch 10 batch_xs = mnist.test.images[0:10] batch_ys = mnist.test.labels[0:10] # print("YMK: print mnist test first 10") # print(" batch_xs:") # print(batch_xs) # run test ys = sess.run(y, feed_dict={x: batch_xs, y_: batch_ys}) # print(" y:") # print(ys) # save to npy np.save(os.path.join(exportbase, 'W.npy'), W.eval()) np.save(os.path.join(exportbase, 'b.npy'), b.eval()) np.save(os.path.join(exportbase, 'batch_xs.npy'), batch_xs) np.save(os.path.join(exportbase, 'batch_ys.npy'), batch_ys) np.save(os.path.join(exportbase, 'ys.npy'), ys)
def quant_layer(x, clip_range, bits): import tensorflow as tf return tf.fake_quant_with_min_max_vars(x, min=clip_range[0], max=clip_range[1], num_bits=bits)
def fake_quant(inputs, clamp, num_bits): max_abs = 2**(num_bits - 1) * 1.0 return tf.fake_quant_with_min_max_vars(inputs, -clamp, clamp - clamp / max_abs, num_bits=num_bits)
def FakeQuantWithMinMaxVars(x, min_value, max_value): return tf.fake_quant_with_min_max_vars(x, min_value, max_value, num_bits=bits)
def fix_pt(_in, int_L, deci_L, _min, _max): v = tf.fake_quant_with_min_max_vars(_in, _min, _max, int_L + deci_L + 1) return v