예제 #1
0
def get_op_input_indices(graph: tf.Graph, ops_with_param_names: List) -> List[int]:
    """
    Get input indices of ops
    :param graph: Tensorflow graph as tf.Graph
    :param ops_with_param_names: List of op names with params to insert quantize ops for
    :return: list of indices of parameters for each op
    """

    query = core.OpQuery(graph, ops_to_ignore=None)
    ops_with_params = [graph.get_operation_by_name(op_name) for op_name in ops_with_param_names]
    input_indices = query.get_weight_inputs(ops_with_params)
    if len(ops_with_param_names) != len(input_indices):
        _logger.error("Length of ops with params and input indices differ")
        raise AssertionError
    return input_indices
예제 #2
0
def _get_internal_ops_to_quantize_params_for(graph: tf.Graph, internal_ops: List[tf.Operation]) \
        -> Tuple[List[str], List[int]]:
    """
    Fetches op names with param input indices for ops with quantizable params
    :param graph: TensorFlow graph as tf.Graph
    :param internal_ops:list of TensorFlow ops within a module
    :return: Tuple consisting of list of op names with params to insert quantize ops for as well as list of indices
    of parameters for each op, within recurrent block
    """

    query = core.OpQuery(graph, ops_to_ignore=None)
    valid_tf_ops = [op for op in query.get_weight_ops(ops=internal_ops)]
    ops_with_param_names = set()
    for tf_op in valid_tf_ops:
        ops_with_param_names.add(tf_op.name)
    input_indices = []
    if ops_with_param_names:
        input_indices = get_op_input_indices(graph, ops_with_param_names)
    else:
        _logger.info("No ops with params detected in this recurrent module")
    return list(ops_with_param_names), input_indices
예제 #3
0
    def _create_layer_attributes_list(ops_to_use, sess):
        """
        Creates list of layer attributes given a set of TF ops
        :param ops_to_use: TF ops to collect layer attributes for
        :param sess: tf.compat.v1.Session to use
        :return: Created list of layer attributes
        """
        query = core.OpQuery(sess.graph)
        layer_attributes_list = []
        for op in ops_to_use:

            weight_shape = query.get_weights_for_op(op).eval(session=sess).shape
            if op.type == 'MatMul':
                n, c = weight_shape
                weight_shape = (1, 1, n, c)
            output_dims = op.outputs[0].shape

            cost = Svd._compute_layer_cost(weight_shape, output_dims, op.type)


            layer_attributes_list.append(LayerAttributes(op, cost, weight_shape))

        return layer_attributes_list
예제 #4
0
    def _compute_compression_ratio(self, sess, cost_metric):
        """
        Compute compression ratio
        :param sess: tf.compat.v1.Session
        :return: Computed compression ratio
        """
        query = core.OpQuery(sess.graph)
        compressible_ops = query.get_weight_ops()
        compressible_ops = [op for op in compressible_ops if op.type in _SVD_SUPPORTED_LAYER_TYPES]

        layer_attributes_list = Svd._create_layer_attributes_list(compressible_ops, sess)
        selected_layers_ops = [layer.layer_ref.name for layer in self._selected_layers]
        layer_attributes_list = [layer for layer in layer_attributes_list if layer.layer_ref.name not in selected_layers_ops]
        compressed_network_cost = Svd._compute_network_cost(layer_attributes_list)

        if cost_metric is CostMetric.memory:
            savings = self._networkCost[0] - compressed_network_cost[0]
            ratio = savings/self._networkCost[0]

        else:
            savings = self._networkCost[1] - compressed_network_cost[1]
            ratio = savings/self._networkCost[1]

        return ratio
예제 #5
0
    def _prepare_graph_for_quantization(self, collect_stats=True):
        """
        Inserts the appropriate quantization ops and prequantizes the params depending upon the
        configuration parameters. Operations are inserted in the current default graph.
        Raises:
            RuntimeError: Thrown when there was an error inserting operations
        :param collect_stats: If True, stats are collected
        :return:
        """

        # Get the op query module
        query = core.OpQuery(self._sess.graph,
                             op_map=self._op_map,
                             ops_to_ignore=self._ops_to_ignore)

        # Query the known op groups and insert quantization nodes after the ops
        # Should we also be including quantization ops starting with labels? No for now...
        activation_ops = query.get_known_ops(inputs=self._input_tensor_names)

        # Query all ops with weights and quantize the input weights
        weight_ops = query.get_weight_ops(skip_bias_op=self._skip_bias)
        input_indices = query.get_weight_inputs(weight_ops)

        # Instantiate DlQuantization object
        quant_node_names = [
            self._get_quantized_name(op.name) for op in activation_ops
        ]
        libpytrext.ResetQuantizer()
        libpytrext.InitQuantizer(quant_node_names, self._comp_mode, [],
                                 self._quant_mode)

        # Add quantization ops/data
        self._insert_weight_quantization_ops(weight_ops, input_indices)
        if not self._skip_output:
            self._insert_activation_quantization_ops(activation_ops,
                                                     collect_stats)
예제 #6
0
    def _split_fc_layer(self, sess, svd_ranks, op_name, bias_op_name=None):
        """
        Split a given conv layer given a rank
        :param sess: tf.compat.v1.Session
        :param svd_ranks: Rank to split the layer with (two ranks in case of SSVD)
        :param op_name: Name of the op to split
        :param bias_op_name: Name of the corresponding bias op (if any)
        :return: None
        """
        # pylint: disable=too-many-statements, too-many-locals

        logger.info('Splitting fully connected op: %s', op_name)

        # Retrieve the op(s) from the current graph
        op = sess.graph.get_operation_by_name(op_name)
        bias_op = None
        if bias_op_name:
            bias_op = sess.graph.get_operation_by_name(bias_op_name)

        # Print current conv weight shape
        query = core.OpQuery(sess.graph)
        w_shape = query.get_weights_for_op(op).get_shape().as_list()
        logger.debug('Original %s weight shape: %s', op.name, str(w_shape))
        split_weights, weight_sizes = [], []
        split_biases, bias_sizes = [], []

        # FC  weights are: [w_shape[2],svd_ranks[0]] in [I,O] order.
        # We must reshape the split weights to SVD format [O,I] and then transpose to NHWC
        split_fc_a_w_shape = (svd_ranks[0], w_shape[0])
        fc_a_weights = np.zeros(split_fc_a_w_shape)
        fc_a_bias = np.zeros(svd_ranks[0])
        split_weights.append(fc_a_weights.flatten().tolist())
        weight_sizes.append(fc_a_weights.size)
        if bias_op:
            split_biases.append(fc_a_bias.flatten().tolist())
            bias_sizes.append(fc_a_bias.size)

        # FC b weights are: [svd_ranks[0],num_filters] in [H,W,I,O] order.
        # We must reshape the split weights to SVD format [O,I,H,W] and then transpose to NHWC
        split_fc_b_w_shape = (w_shape[1], svd_ranks[0])
        fc_b_weights = np.zeros(split_fc_b_w_shape)
        split_weights.append(fc_b_weights.flatten().tolist())
        weight_sizes.append(fc_b_weights.size)
        if bias_op:
            fc_b_bias = np.zeros(w_shape[1])
            split_biases.append(fc_b_bias.flatten().tolist())
            bias_sizes.append(fc_b_bias.size)

        # Split the weights and biases according to the number of layers and ranks
        split_weights = self._svd.SplitLayerWeights(op.name, split_weights, weight_sizes, svd_ranks)
        split_biases = self._svd.SplitLayerBiases(op.name, split_biases, bias_sizes, svd_ranks)

        if split_weights:
            fc_a_name = op.name+'_a'
            fc_a_weights = np.array(split_weights[0]).reshape(split_fc_a_w_shape).transpose(1, 0)
            fc_a_w = tf.Variable(initial_value=fc_a_weights, name=fc_a_name+'_w', dtype=tf.float32)
            logger.debug('%s weight shape: %s', fc_a_name, str(fc_a_weights.shape))

            # Create fc_a using default strides (1,1)
            fc_acts = tf.matmul(op.inputs[0], fc_a_w, name=fc_a_name)
            if bias_op:
                fc_a_bias = tf.Variable(initial_value=split_biases[0], name=fc_a_name+'_bias', dtype=tf.float32)
                fc_acts = fc_acts + fc_a_bias

        if len(split_weights) > 1:
            # Create fc_b
            fc_b_name = op.name+'_b'
            fc_b_weights = np.array(split_weights[1]).reshape(split_fc_b_w_shape).transpose(1, 0)
            fc_b_w = tf.Variable(initial_value=fc_b_weights, name=fc_b_name+'_w', dtype=tf.float32)
            logger.debug('%s weight shape: %s', fc_b_name, str(fc_b_weights.shape))
            fc_acts = tf.matmul(fc_acts, fc_b_w, name=fc_b_name)
            if bias_op:
                fc_b_bias = tf.Variable(initial_value=split_biases[1], name=fc_b_name+'_bias', dtype=tf.float32)
                fc_acts = fc_acts + fc_b_bias
        ratio = self._compute_per_layer_compression_ratio([fc_a_w.shape, fc_b_w.shape], fc_acts.shape, w_shape, 'MatMul')
        consumers = []
        rerouted_inputs = [bias_op.outputs[0]] if bias_op else [op.outputs[0]]
        for inp in rerouted_inputs:
            for consumer in inp.consumers():
                consumers.append(consumer)
        _ = ge.reroute_ts(fc_acts, rerouted_inputs, can_modify=consumers)
        return ratio
예제 #7
0
    def _split_conv_layer(self, sess, svd_ranks, attr, op_name, bias_op_name=None):
        """
        Split a given conv layer given a rank
        :param sess: tf.compat.v1.Session
        :param svd_ranks: Rank to split the layer with (two ranks in case of SSVD)
        :param attr: Reference to the corresponding layer attribute
        :param op_name: Name of the op to split
        :param bias_op_name: Name of the corresponding bias op (if any)
        :return: None
        """
        # pylint: disable=too-many-statements,too-many-branches,too-many-locals

        logger.info('Splitting conv op: %s', op_name)

        # Retrieve the op(s) from the current graph
        op = sess.graph.get_operation_by_name(op_name)

        bias_op = None
        if bias_op_name:
            bias_op = sess.graph.get_operation_by_name(bias_op_name)

        # Create new 'conv_a' layer
        pad_mode = op.get_attr('padding')
        data_format = op.get_attr('data_format').decode('utf-8')
        strides = op.get_attr('strides')

        # Print current conv weight shape
        query = core.OpQuery(sess.graph)
        w_shape = query.get_weights_for_op(op).get_shape().as_list()
        logger.debug('Original %s weight shape: %s', op.name, str(w_shape))
        split_weights, weight_sizes = [], []
        split_biases, bias_sizes = [], []

        # TF weights are in [H,W,I,O] order. We must reshape the split weights to SVD format [O,I,H,W]
        # and then transpose back
        # Conv a weights are: [1, 1, w_shape[2], svd_ranks[0]]
        split_conv_a_w_shape = (svd_ranks[0], w_shape[2], 1, 1)
        conv_a_weights = np.zeros(split_conv_a_w_shape)     # transpose(2,3,1,0)
        split_weights.append(conv_a_weights.flatten().tolist())
        weight_sizes.append(conv_a_weights.size)
        if bias_op:
            conv_a_bias = np.zeros(svd_ranks[0])
            split_biases.append(conv_a_bias.flatten().tolist())
            bias_sizes.append(conv_a_bias.size)

        num_filters = w_shape[3]
        if len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE:
            # Output channels = output_rank (s)
            num_filters = svd_ranks[1]

        # Conv b weights are: [w_shape[0],w_shape[1],svd_ranks[0],num_filters]
        split_conv_b_w_shape = (num_filters, svd_ranks[0], w_shape[0], w_shape[1])
        conv_b_weights = np.zeros(split_conv_b_w_shape)
        conv_b_bias = np.zeros(num_filters)
        split_weights.append(conv_b_weights.flatten().tolist())
        weight_sizes.append(conv_b_weights.size)
        if bias_op:
            split_biases.append(conv_b_bias.flatten().tolist())
            bias_sizes.append(conv_b_bias.size)

        # Only create a third conv layer when performing successive SVD
        if len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE:
            # Conv c weights are: [1,1,num_filters,w_shape[3]]
            split_conv_c_w_shape = (w_shape[3], num_filters, 1, 1)
            conv_c_weights = np.zeros(split_conv_c_w_shape)
            conv_c_bias = np.zeros(w_shape[3])
            split_weights.append(conv_c_weights.flatten().tolist())
            weight_sizes.append(conv_c_weights.size)
            if bias_op:
                split_biases.append(conv_c_bias.flatten().tolist())
                bias_sizes.append(conv_c_bias.size)

        # Split the weights and biases according to the number of layers and ranks
        split_weights = self._svd.SplitLayerWeights(op.name, split_weights, weight_sizes, svd_ranks)
        split_biases = self._svd.SplitLayerBiases(op.name, split_biases, bias_sizes, svd_ranks)
        if split_weights:
            conv_a_name = op.name+'_a'
            conv_a_weights = np.array(split_weights[0]).reshape(split_conv_a_w_shape).transpose(2, 3, 1, 0)
            conv_a_w = tf.Variable(initial_value=conv_a_weights, name=conv_a_name+'_w', dtype=tf.float32)
            logger.debug('%s weight shape: %s', conv_a_name, str(conv_a_weights.shape))

            # Create conv_a using default strides (1,1)
            # pylint: disable=no-member
            conv_acts = tf.nn.conv2d(op.inputs[0], conv_a_w, strides=[1, 1, 1, 1], data_format=data_format,
                                     padding=pad_mode, name=op.name+'_a')  # dilation_rate=dilation_rate
            if bias_op:
                conv_a_bias = tf.Variable(initial_value=split_biases[0], name=conv_a_name+'_bias', dtype=tf.float32)
                conv_acts = conv_acts + conv_a_bias     # tf.nn.bias_add(conv_acts, split_biases[0])

        if len(split_weights) > 1:
            # Create conv_b
            conv_b_name = op.name+'_b'
            conv_b_weights = np.array(split_weights[1]).reshape(split_conv_b_w_shape).transpose(2, 3, 1, 0)
            conv_b_w = tf.Variable(initial_value=conv_b_weights, name=conv_b_name+'_w', dtype=tf.float32)
            logger.debug('%s weight shape: %s', conv_b_name, str(conv_b_weights.shape))

            # pylint: disable=no-member
            conv_acts = tf.nn.conv2d(conv_acts, conv_b_w, strides=strides, data_format=data_format, padding=pad_mode, name=conv_b_name) #dilation_rate=dilation_rate
            if bias_op:
                conv_b_bias = tf.Variable(initial_value=split_biases[1], name=conv_b_name+'_bias', dtype=tf.float32)
                conv_acts = conv_acts + conv_b_bias     # tf.nn.bias_add(conv_acts, split_biases[1])
        ratio = self._compute_per_layer_compression_ratio([conv_a_w.shape, conv_b_w.shape], conv_acts.shape, w_shape, "Conv2D")
        # Only create a third conv layer when performing successive SVD
        if len(split_weights) > 2 and len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE:
            # Create conv_c, using default strides (1,1)
            conv_c_name = op.name+'_c'
            conv_c_weights = np.array(split_weights[2]).reshape(split_conv_c_w_shape).transpose(2, 3, 1, 0)
            conv_c_w = tf.Variable(initial_value=conv_c_weights, name=conv_c_name+'_w', dtype=tf.float32)
            logger.debug('%s weight shape: %s', conv_c_name, str(conv_c_weights.shape))

            # pylint: disable=no-member
            conv_acts = tf.nn.conv2d(conv_acts, conv_c_w, strides=[1, 1, 1, 1], data_format=data_format,
                                     padding=pad_mode, name=conv_c_name)
            if bias_op:
                conv_c_bias = tf.Variable(initial_value=split_biases[2], name=conv_c_name+'_bias', dtype=tf.float32)
                conv_acts = conv_acts + conv_c_bias     # tf.nn.bias_add(conv_acts, split_biases[2])

        consumers = []
        rerouted_inputs = [bias_op.outputs[0]] if bias_op else [op.outputs[0]]
        for inp in rerouted_inputs:
            for consumer in inp.consumers():
                consumers.append(consumer)
        _ = ge.reroute_ts(conv_acts, rerouted_inputs, can_modify=consumers)

        return ratio
예제 #8
0
    def _store_net_stats(self, sess):
        """
        Store layer attributes in the PyMo library instance
        :param sess: tf.compat.v1.Session
        :return: None
        """
        # pylint: disable=too-many-locals,too-many-branches,too-many-statements

        if self._metric == CostMetric.memory:
            pymo_metric = pymo.COST_TYPE_MEMORY
        else:
            pymo_metric = pymo.COST_TYPE_MAC

        self._svd.SetCostMetric(pymo_metric)

        # Layer-selection
        if self._layers_to_compress:
            selected_layers, network_cost = self._pick_compression_layers(sess,
                                                                          self._metric,
                                                                          self.LayerSelectionScheme.manual,
                                                                          layers_to_compress=self._layers_to_compress)
        elif self._num_layers > 0:
            selected_layers, network_cost = self._pick_compression_layers(sess,
                                                                          self._metric,
                                                                          self.LayerSelectionScheme.top_n_layers,
                                                                          num_layers=self._num_layers)
        else:
            percent_thresh = self._layer_selection_threshold * 100
            selected_layers, network_cost = self._pick_compression_layers(sess,
                                                                          self._metric,
                                                                          self.LayerSelectionScheme.top_x_percent,
                                                                          percent_thresh=percent_thresh)

        self._networkCost = network_cost

        print("Selected Layers:")
        for layer in selected_layers:
            print(layer.layer_ref.name)

        self._selected_layers = selected_layers

        # Get the op query module and query for all Conv/FC layers
        query = core.OpQuery(sess.graph)
        self._compressible_ops = query.get_weight_ops()

        # Set up the layer attributes for each Conv/FC layer (this also checks for trailing
        # bias adds
        for i, op in enumerate(self._compressible_ops):

            # If op is not a selected layer, skip
            if not any(op is layer.layer_ref for layer in selected_layers):
                continue

            attr = pymo.LayerAttributes()
            layerName = op.name
            output_dims = op.outputs[0].shape # TF uses dims [N,H,W,C]
            attr.layerType = self._get_layer_type(op)
            if self.svd_type == pymo.TYPE_SINGLE:
                attr.mode = self._svd.GetCompressionType(attr.layerType, 'single')
            else:
                attr.mode = self._svd.GetCompressionType(attr.layerType, 'successive')

            if op.type == 'Conv2D' or op.type == 'MatMul':
                logger.info('Setting layer attributes for: %s', layerName+'('+op.type+')')

                # Get weights
                weights = query.get_weights_for_op(op).eval(session=sess)
                w_shape = weights.shape
                logger.debug('Got weight shape: %s', w_shape)

                # Check for bias op
                bias = None
                if (i+1) < len(self._compressible_ops):
                    bias = query.get_bias_for_op(self._compressible_ops[i+1])
                    if bias is not None:
                        bias = bias.eval(session=sess)
                        logger.debug('Got %s w/bias. Shape: %s', op.type, str(bias.shape))

                if op.type == 'Conv2D':
                    attr.shape = [w_shape[3], w_shape[2], w_shape[0], w_shape[1]]   # TF Conv weight order [KH,KW,ID,OD]
                    attr.activation_dims = (output_dims[1], output_dims[2])         # (H,W)

                    # CONV weights are stored in the order {H,W,I,O} in Tensorflow
                    # Re-order them to the form {O,I,H,W}
                    weights = np.transpose(weights, (3, 2, 0, 1))

                elif op.type == 'MatMul':
                    attr.shape = [w_shape[1], w_shape[0], 1, 1]   # TF FC weight order [ID,OD], SVD expects [OD,ID]
                    attr.activation_dims = (1, 1)
                    weights = np.transpose(weights, (1, 0))

                # blobs is a numpy array... add to list then set
                params = [weights.flatten()]
                if bias is not None:
                    params.append(bias.flatten())
                attr.blobs = params

                # Save the attributes for this layer
                self._svd.StoreLayerAttributes(layerName, attr)
예제 #9
0
    def _pick_compression_layers(sess, cost_metric, layer_select_scheme, **kwargs):
        """
        Pick layers for SVD compression given parameters
        :param sess: tf.compat.v1.Session
        :param cost_metric: Metric to use for evaluating layer cost (either in terms of memory or mac)
        :param layer_select_scheme: Layer selection scheme to use
        :param kwargs: Keyword arguments that depend on which layer selection scheme is specified
            top_n_layers:: num_layers: Number of layers to pick
            top_x_percent:: percent_thresh: Top layers up to this parameter will be selected
            manual:: layers_to_compress: List of layers (names) to compress
        :return:
        """
        # pylint: disable=too-many-locals,too-many-branches

        if not isinstance(cost_metric, CostMetric):
            raise TypeError("cost_metric is not of type CostMetric")

        if not isinstance(layer_select_scheme, Svd.LayerSelectionScheme):
            raise TypeError("layer_selection_scheme is not of type Svd.LayerSelectionScheme")

        # Find all compressible ops
        query = core.OpQuery(sess.graph)
        compressible_ops = query.get_weight_ops()
        compressible_ops = [op for op in compressible_ops if op.type in _SVD_SUPPORTED_LAYER_TYPES]

        layer_attributes_list = Svd._create_layer_attributes_list(compressible_ops, sess)
        network_cost = Svd._compute_network_cost(layer_attributes_list)

        # Heuristic1: Reject any ops whose param shape does not meet a base criterion
        pruned_list = []
        for layer_attributes in layer_attributes_list:
            h, w, n, c = layer_attributes.weight_shape
            if (n >= _MIN_LAYER_DIM_FOR_SVD) and ((c * h * w) >= _MIN_LAYER_DIM_FOR_SVD):
                pruned_list.append(layer_attributes)
            else:
                print("Pruning out {}: shape is {}".format(layer_attributes.layer_ref.name,
                                                           layer_attributes.weight_shape))

        # Reset layer_attributes_list for the next phase
        layer_attributes_list = pruned_list
        pruned_list = []

        # Sort the attribute list based on cost
        if cost_metric == CostMetric.memory:
            layer_attributes_list.sort(key=lambda x: x.cost[0], reverse=True)
        else:
            layer_attributes_list.sort(key=lambda x: x.cost[1], reverse=True)

        if layer_select_scheme == Svd.LayerSelectionScheme.top_n_layers:
            num_layers = kwargs['num_layers']
            pruned_list = layer_attributes_list[:num_layers]

        elif layer_select_scheme == Svd.LayerSelectionScheme.top_x_percent:
            percent_thresh = kwargs['percent_thresh']
            accum_cost = 0.
            total_cost = network_cost[0] if (cost_metric == CostMetric.memory) else network_cost[1]

            for layer in layer_attributes_list:
                cost = layer.cost[0] if (cost_metric == CostMetric.memory) else layer.cost[1]

                if (100 * (cost + accum_cost)/total_cost) < percent_thresh:
                    pruned_list.append(layer)
                    accum_cost += cost

        elif layer_select_scheme == Svd.LayerSelectionScheme.manual:
            layers_to_compress = kwargs['layers_to_compress']
            for layer in layer_attributes_list:
                if layer.layer_ref.name in layers_to_compress:
                    pruned_list.append(layer)

            if not pruned_list:
                raise RuntimeError('No suitable layers found in the model.')
        return pruned_list, network_cost