def get_op_input_indices(graph: tf.Graph, ops_with_param_names: List) -> List[int]: """ Get input indices of ops :param graph: Tensorflow graph as tf.Graph :param ops_with_param_names: List of op names with params to insert quantize ops for :return: list of indices of parameters for each op """ query = core.OpQuery(graph, ops_to_ignore=None) ops_with_params = [graph.get_operation_by_name(op_name) for op_name in ops_with_param_names] input_indices = query.get_weight_inputs(ops_with_params) if len(ops_with_param_names) != len(input_indices): _logger.error("Length of ops with params and input indices differ") raise AssertionError return input_indices
def _get_internal_ops_to_quantize_params_for(graph: tf.Graph, internal_ops: List[tf.Operation]) \ -> Tuple[List[str], List[int]]: """ Fetches op names with param input indices for ops with quantizable params :param graph: TensorFlow graph as tf.Graph :param internal_ops:list of TensorFlow ops within a module :return: Tuple consisting of list of op names with params to insert quantize ops for as well as list of indices of parameters for each op, within recurrent block """ query = core.OpQuery(graph, ops_to_ignore=None) valid_tf_ops = [op for op in query.get_weight_ops(ops=internal_ops)] ops_with_param_names = set() for tf_op in valid_tf_ops: ops_with_param_names.add(tf_op.name) input_indices = [] if ops_with_param_names: input_indices = get_op_input_indices(graph, ops_with_param_names) else: _logger.info("No ops with params detected in this recurrent module") return list(ops_with_param_names), input_indices
def _create_layer_attributes_list(ops_to_use, sess): """ Creates list of layer attributes given a set of TF ops :param ops_to_use: TF ops to collect layer attributes for :param sess: tf.compat.v1.Session to use :return: Created list of layer attributes """ query = core.OpQuery(sess.graph) layer_attributes_list = [] for op in ops_to_use: weight_shape = query.get_weights_for_op(op).eval(session=sess).shape if op.type == 'MatMul': n, c = weight_shape weight_shape = (1, 1, n, c) output_dims = op.outputs[0].shape cost = Svd._compute_layer_cost(weight_shape, output_dims, op.type) layer_attributes_list.append(LayerAttributes(op, cost, weight_shape)) return layer_attributes_list
def _compute_compression_ratio(self, sess, cost_metric): """ Compute compression ratio :param sess: tf.compat.v1.Session :return: Computed compression ratio """ query = core.OpQuery(sess.graph) compressible_ops = query.get_weight_ops() compressible_ops = [op for op in compressible_ops if op.type in _SVD_SUPPORTED_LAYER_TYPES] layer_attributes_list = Svd._create_layer_attributes_list(compressible_ops, sess) selected_layers_ops = [layer.layer_ref.name for layer in self._selected_layers] layer_attributes_list = [layer for layer in layer_attributes_list if layer.layer_ref.name not in selected_layers_ops] compressed_network_cost = Svd._compute_network_cost(layer_attributes_list) if cost_metric is CostMetric.memory: savings = self._networkCost[0] - compressed_network_cost[0] ratio = savings/self._networkCost[0] else: savings = self._networkCost[1] - compressed_network_cost[1] ratio = savings/self._networkCost[1] return ratio
def _prepare_graph_for_quantization(self, collect_stats=True): """ Inserts the appropriate quantization ops and prequantizes the params depending upon the configuration parameters. Operations are inserted in the current default graph. Raises: RuntimeError: Thrown when there was an error inserting operations :param collect_stats: If True, stats are collected :return: """ # Get the op query module query = core.OpQuery(self._sess.graph, op_map=self._op_map, ops_to_ignore=self._ops_to_ignore) # Query the known op groups and insert quantization nodes after the ops # Should we also be including quantization ops starting with labels? No for now... activation_ops = query.get_known_ops(inputs=self._input_tensor_names) # Query all ops with weights and quantize the input weights weight_ops = query.get_weight_ops(skip_bias_op=self._skip_bias) input_indices = query.get_weight_inputs(weight_ops) # Instantiate DlQuantization object quant_node_names = [ self._get_quantized_name(op.name) for op in activation_ops ] libpytrext.ResetQuantizer() libpytrext.InitQuantizer(quant_node_names, self._comp_mode, [], self._quant_mode) # Add quantization ops/data self._insert_weight_quantization_ops(weight_ops, input_indices) if not self._skip_output: self._insert_activation_quantization_ops(activation_ops, collect_stats)
def _split_fc_layer(self, sess, svd_ranks, op_name, bias_op_name=None): """ Split a given conv layer given a rank :param sess: tf.compat.v1.Session :param svd_ranks: Rank to split the layer with (two ranks in case of SSVD) :param op_name: Name of the op to split :param bias_op_name: Name of the corresponding bias op (if any) :return: None """ # pylint: disable=too-many-statements, too-many-locals logger.info('Splitting fully connected op: %s', op_name) # Retrieve the op(s) from the current graph op = sess.graph.get_operation_by_name(op_name) bias_op = None if bias_op_name: bias_op = sess.graph.get_operation_by_name(bias_op_name) # Print current conv weight shape query = core.OpQuery(sess.graph) w_shape = query.get_weights_for_op(op).get_shape().as_list() logger.debug('Original %s weight shape: %s', op.name, str(w_shape)) split_weights, weight_sizes = [], [] split_biases, bias_sizes = [], [] # FC weights are: [w_shape[2],svd_ranks[0]] in [I,O] order. # We must reshape the split weights to SVD format [O,I] and then transpose to NHWC split_fc_a_w_shape = (svd_ranks[0], w_shape[0]) fc_a_weights = np.zeros(split_fc_a_w_shape) fc_a_bias = np.zeros(svd_ranks[0]) split_weights.append(fc_a_weights.flatten().tolist()) weight_sizes.append(fc_a_weights.size) if bias_op: split_biases.append(fc_a_bias.flatten().tolist()) bias_sizes.append(fc_a_bias.size) # FC b weights are: [svd_ranks[0],num_filters] in [H,W,I,O] order. # We must reshape the split weights to SVD format [O,I,H,W] and then transpose to NHWC split_fc_b_w_shape = (w_shape[1], svd_ranks[0]) fc_b_weights = np.zeros(split_fc_b_w_shape) split_weights.append(fc_b_weights.flatten().tolist()) weight_sizes.append(fc_b_weights.size) if bias_op: fc_b_bias = np.zeros(w_shape[1]) split_biases.append(fc_b_bias.flatten().tolist()) bias_sizes.append(fc_b_bias.size) # Split the weights and biases according to the number of layers and ranks split_weights = self._svd.SplitLayerWeights(op.name, split_weights, weight_sizes, svd_ranks) split_biases = self._svd.SplitLayerBiases(op.name, split_biases, bias_sizes, svd_ranks) if split_weights: fc_a_name = op.name+'_a' fc_a_weights = np.array(split_weights[0]).reshape(split_fc_a_w_shape).transpose(1, 0) fc_a_w = tf.Variable(initial_value=fc_a_weights, name=fc_a_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', fc_a_name, str(fc_a_weights.shape)) # Create fc_a using default strides (1,1) fc_acts = tf.matmul(op.inputs[0], fc_a_w, name=fc_a_name) if bias_op: fc_a_bias = tf.Variable(initial_value=split_biases[0], name=fc_a_name+'_bias', dtype=tf.float32) fc_acts = fc_acts + fc_a_bias if len(split_weights) > 1: # Create fc_b fc_b_name = op.name+'_b' fc_b_weights = np.array(split_weights[1]).reshape(split_fc_b_w_shape).transpose(1, 0) fc_b_w = tf.Variable(initial_value=fc_b_weights, name=fc_b_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', fc_b_name, str(fc_b_weights.shape)) fc_acts = tf.matmul(fc_acts, fc_b_w, name=fc_b_name) if bias_op: fc_b_bias = tf.Variable(initial_value=split_biases[1], name=fc_b_name+'_bias', dtype=tf.float32) fc_acts = fc_acts + fc_b_bias ratio = self._compute_per_layer_compression_ratio([fc_a_w.shape, fc_b_w.shape], fc_acts.shape, w_shape, 'MatMul') consumers = [] rerouted_inputs = [bias_op.outputs[0]] if bias_op else [op.outputs[0]] for inp in rerouted_inputs: for consumer in inp.consumers(): consumers.append(consumer) _ = ge.reroute_ts(fc_acts, rerouted_inputs, can_modify=consumers) return ratio
def _split_conv_layer(self, sess, svd_ranks, attr, op_name, bias_op_name=None): """ Split a given conv layer given a rank :param sess: tf.compat.v1.Session :param svd_ranks: Rank to split the layer with (two ranks in case of SSVD) :param attr: Reference to the corresponding layer attribute :param op_name: Name of the op to split :param bias_op_name: Name of the corresponding bias op (if any) :return: None """ # pylint: disable=too-many-statements,too-many-branches,too-many-locals logger.info('Splitting conv op: %s', op_name) # Retrieve the op(s) from the current graph op = sess.graph.get_operation_by_name(op_name) bias_op = None if bias_op_name: bias_op = sess.graph.get_operation_by_name(bias_op_name) # Create new 'conv_a' layer pad_mode = op.get_attr('padding') data_format = op.get_attr('data_format').decode('utf-8') strides = op.get_attr('strides') # Print current conv weight shape query = core.OpQuery(sess.graph) w_shape = query.get_weights_for_op(op).get_shape().as_list() logger.debug('Original %s weight shape: %s', op.name, str(w_shape)) split_weights, weight_sizes = [], [] split_biases, bias_sizes = [], [] # TF weights are in [H,W,I,O] order. We must reshape the split weights to SVD format [O,I,H,W] # and then transpose back # Conv a weights are: [1, 1, w_shape[2], svd_ranks[0]] split_conv_a_w_shape = (svd_ranks[0], w_shape[2], 1, 1) conv_a_weights = np.zeros(split_conv_a_w_shape) # transpose(2,3,1,0) split_weights.append(conv_a_weights.flatten().tolist()) weight_sizes.append(conv_a_weights.size) if bias_op: conv_a_bias = np.zeros(svd_ranks[0]) split_biases.append(conv_a_bias.flatten().tolist()) bias_sizes.append(conv_a_bias.size) num_filters = w_shape[3] if len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE: # Output channels = output_rank (s) num_filters = svd_ranks[1] # Conv b weights are: [w_shape[0],w_shape[1],svd_ranks[0],num_filters] split_conv_b_w_shape = (num_filters, svd_ranks[0], w_shape[0], w_shape[1]) conv_b_weights = np.zeros(split_conv_b_w_shape) conv_b_bias = np.zeros(num_filters) split_weights.append(conv_b_weights.flatten().tolist()) weight_sizes.append(conv_b_weights.size) if bias_op: split_biases.append(conv_b_bias.flatten().tolist()) bias_sizes.append(conv_b_bias.size) # Only create a third conv layer when performing successive SVD if len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE: # Conv c weights are: [1,1,num_filters,w_shape[3]] split_conv_c_w_shape = (w_shape[3], num_filters, 1, 1) conv_c_weights = np.zeros(split_conv_c_w_shape) conv_c_bias = np.zeros(w_shape[3]) split_weights.append(conv_c_weights.flatten().tolist()) weight_sizes.append(conv_c_weights.size) if bias_op: split_biases.append(conv_c_bias.flatten().tolist()) bias_sizes.append(conv_c_bias.size) # Split the weights and biases according to the number of layers and ranks split_weights = self._svd.SplitLayerWeights(op.name, split_weights, weight_sizes, svd_ranks) split_biases = self._svd.SplitLayerBiases(op.name, split_biases, bias_sizes, svd_ranks) if split_weights: conv_a_name = op.name+'_a' conv_a_weights = np.array(split_weights[0]).reshape(split_conv_a_w_shape).transpose(2, 3, 1, 0) conv_a_w = tf.Variable(initial_value=conv_a_weights, name=conv_a_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', conv_a_name, str(conv_a_weights.shape)) # Create conv_a using default strides (1,1) # pylint: disable=no-member conv_acts = tf.nn.conv2d(op.inputs[0], conv_a_w, strides=[1, 1, 1, 1], data_format=data_format, padding=pad_mode, name=op.name+'_a') # dilation_rate=dilation_rate if bias_op: conv_a_bias = tf.Variable(initial_value=split_biases[0], name=conv_a_name+'_bias', dtype=tf.float32) conv_acts = conv_acts + conv_a_bias # tf.nn.bias_add(conv_acts, split_biases[0]) if len(split_weights) > 1: # Create conv_b conv_b_name = op.name+'_b' conv_b_weights = np.array(split_weights[1]).reshape(split_conv_b_w_shape).transpose(2, 3, 1, 0) conv_b_w = tf.Variable(initial_value=conv_b_weights, name=conv_b_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', conv_b_name, str(conv_b_weights.shape)) # pylint: disable=no-member conv_acts = tf.nn.conv2d(conv_acts, conv_b_w, strides=strides, data_format=data_format, padding=pad_mode, name=conv_b_name) #dilation_rate=dilation_rate if bias_op: conv_b_bias = tf.Variable(initial_value=split_biases[1], name=conv_b_name+'_bias', dtype=tf.float32) conv_acts = conv_acts + conv_b_bias # tf.nn.bias_add(conv_acts, split_biases[1]) ratio = self._compute_per_layer_compression_ratio([conv_a_w.shape, conv_b_w.shape], conv_acts.shape, w_shape, "Conv2D") # Only create a third conv layer when performing successive SVD if len(split_weights) > 2 and len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE: # Create conv_c, using default strides (1,1) conv_c_name = op.name+'_c' conv_c_weights = np.array(split_weights[2]).reshape(split_conv_c_w_shape).transpose(2, 3, 1, 0) conv_c_w = tf.Variable(initial_value=conv_c_weights, name=conv_c_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', conv_c_name, str(conv_c_weights.shape)) # pylint: disable=no-member conv_acts = tf.nn.conv2d(conv_acts, conv_c_w, strides=[1, 1, 1, 1], data_format=data_format, padding=pad_mode, name=conv_c_name) if bias_op: conv_c_bias = tf.Variable(initial_value=split_biases[2], name=conv_c_name+'_bias', dtype=tf.float32) conv_acts = conv_acts + conv_c_bias # tf.nn.bias_add(conv_acts, split_biases[2]) consumers = [] rerouted_inputs = [bias_op.outputs[0]] if bias_op else [op.outputs[0]] for inp in rerouted_inputs: for consumer in inp.consumers(): consumers.append(consumer) _ = ge.reroute_ts(conv_acts, rerouted_inputs, can_modify=consumers) return ratio
def _store_net_stats(self, sess): """ Store layer attributes in the PyMo library instance :param sess: tf.compat.v1.Session :return: None """ # pylint: disable=too-many-locals,too-many-branches,too-many-statements if self._metric == CostMetric.memory: pymo_metric = pymo.COST_TYPE_MEMORY else: pymo_metric = pymo.COST_TYPE_MAC self._svd.SetCostMetric(pymo_metric) # Layer-selection if self._layers_to_compress: selected_layers, network_cost = self._pick_compression_layers(sess, self._metric, self.LayerSelectionScheme.manual, layers_to_compress=self._layers_to_compress) elif self._num_layers > 0: selected_layers, network_cost = self._pick_compression_layers(sess, self._metric, self.LayerSelectionScheme.top_n_layers, num_layers=self._num_layers) else: percent_thresh = self._layer_selection_threshold * 100 selected_layers, network_cost = self._pick_compression_layers(sess, self._metric, self.LayerSelectionScheme.top_x_percent, percent_thresh=percent_thresh) self._networkCost = network_cost print("Selected Layers:") for layer in selected_layers: print(layer.layer_ref.name) self._selected_layers = selected_layers # Get the op query module and query for all Conv/FC layers query = core.OpQuery(sess.graph) self._compressible_ops = query.get_weight_ops() # Set up the layer attributes for each Conv/FC layer (this also checks for trailing # bias adds for i, op in enumerate(self._compressible_ops): # If op is not a selected layer, skip if not any(op is layer.layer_ref for layer in selected_layers): continue attr = pymo.LayerAttributes() layerName = op.name output_dims = op.outputs[0].shape # TF uses dims [N,H,W,C] attr.layerType = self._get_layer_type(op) if self.svd_type == pymo.TYPE_SINGLE: attr.mode = self._svd.GetCompressionType(attr.layerType, 'single') else: attr.mode = self._svd.GetCompressionType(attr.layerType, 'successive') if op.type == 'Conv2D' or op.type == 'MatMul': logger.info('Setting layer attributes for: %s', layerName+'('+op.type+')') # Get weights weights = query.get_weights_for_op(op).eval(session=sess) w_shape = weights.shape logger.debug('Got weight shape: %s', w_shape) # Check for bias op bias = None if (i+1) < len(self._compressible_ops): bias = query.get_bias_for_op(self._compressible_ops[i+1]) if bias is not None: bias = bias.eval(session=sess) logger.debug('Got %s w/bias. Shape: %s', op.type, str(bias.shape)) if op.type == 'Conv2D': attr.shape = [w_shape[3], w_shape[2], w_shape[0], w_shape[1]] # TF Conv weight order [KH,KW,ID,OD] attr.activation_dims = (output_dims[1], output_dims[2]) # (H,W) # CONV weights are stored in the order {H,W,I,O} in Tensorflow # Re-order them to the form {O,I,H,W} weights = np.transpose(weights, (3, 2, 0, 1)) elif op.type == 'MatMul': attr.shape = [w_shape[1], w_shape[0], 1, 1] # TF FC weight order [ID,OD], SVD expects [OD,ID] attr.activation_dims = (1, 1) weights = np.transpose(weights, (1, 0)) # blobs is a numpy array... add to list then set params = [weights.flatten()] if bias is not None: params.append(bias.flatten()) attr.blobs = params # Save the attributes for this layer self._svd.StoreLayerAttributes(layerName, attr)
def _pick_compression_layers(sess, cost_metric, layer_select_scheme, **kwargs): """ Pick layers for SVD compression given parameters :param sess: tf.compat.v1.Session :param cost_metric: Metric to use for evaluating layer cost (either in terms of memory or mac) :param layer_select_scheme: Layer selection scheme to use :param kwargs: Keyword arguments that depend on which layer selection scheme is specified top_n_layers:: num_layers: Number of layers to pick top_x_percent:: percent_thresh: Top layers up to this parameter will be selected manual:: layers_to_compress: List of layers (names) to compress :return: """ # pylint: disable=too-many-locals,too-many-branches if not isinstance(cost_metric, CostMetric): raise TypeError("cost_metric is not of type CostMetric") if not isinstance(layer_select_scheme, Svd.LayerSelectionScheme): raise TypeError("layer_selection_scheme is not of type Svd.LayerSelectionScheme") # Find all compressible ops query = core.OpQuery(sess.graph) compressible_ops = query.get_weight_ops() compressible_ops = [op for op in compressible_ops if op.type in _SVD_SUPPORTED_LAYER_TYPES] layer_attributes_list = Svd._create_layer_attributes_list(compressible_ops, sess) network_cost = Svd._compute_network_cost(layer_attributes_list) # Heuristic1: Reject any ops whose param shape does not meet a base criterion pruned_list = [] for layer_attributes in layer_attributes_list: h, w, n, c = layer_attributes.weight_shape if (n >= _MIN_LAYER_DIM_FOR_SVD) and ((c * h * w) >= _MIN_LAYER_DIM_FOR_SVD): pruned_list.append(layer_attributes) else: print("Pruning out {}: shape is {}".format(layer_attributes.layer_ref.name, layer_attributes.weight_shape)) # Reset layer_attributes_list for the next phase layer_attributes_list = pruned_list pruned_list = [] # Sort the attribute list based on cost if cost_metric == CostMetric.memory: layer_attributes_list.sort(key=lambda x: x.cost[0], reverse=True) else: layer_attributes_list.sort(key=lambda x: x.cost[1], reverse=True) if layer_select_scheme == Svd.LayerSelectionScheme.top_n_layers: num_layers = kwargs['num_layers'] pruned_list = layer_attributes_list[:num_layers] elif layer_select_scheme == Svd.LayerSelectionScheme.top_x_percent: percent_thresh = kwargs['percent_thresh'] accum_cost = 0. total_cost = network_cost[0] if (cost_metric == CostMetric.memory) else network_cost[1] for layer in layer_attributes_list: cost = layer.cost[0] if (cost_metric == CostMetric.memory) else layer.cost[1] if (100 * (cost + accum_cost)/total_cost) < percent_thresh: pruned_list.append(layer) accum_cost += cost elif layer_select_scheme == Svd.LayerSelectionScheme.manual: layers_to_compress = kwargs['layers_to_compress'] for layer in layer_attributes_list: if layer.layer_ref.name in layers_to_compress: pruned_list.append(layer) if not pruned_list: raise RuntimeError('No suitable layers found in the model.') return pruned_list, network_cost