def initialize_model_with_bias(sess: tf.compat.v1.Session, input_op_names: List[str], output_op_names: List[str]) \ -> tf.compat.v1.Session: """ Initializes given model with bias. Adds zero bias to conv/linear layers without bias param, in given model. :param sess: model to be updated as tf.compat.v1.Session :return: updated session as tf.compat.v1.Session """ assert sess is not None with sess.graph.as_default(): ops = get_valid_ops(sess.graph, input_op_names, output_op_names) for op in ops: # skip gradient ops if not op.name.startswith('gradients/') and \ op.type in ['Conv2D', 'DepthwiseConv2dNative', 'MatMul']: # add bias if not present if BiasUtils.is_bias_none(op): # add bias param bias_shape = BiasUtils._get_bias_shape_from_weights(op) zero_bias = tf.Variable( initial_value=np.zeros(bias_shape), dtype=tf.float32) BiasUtils._create_bias_add_op_and_insert( sess, op, zero_bias) new_sess = save_and_load_graph('./temp', sess) sess.close() return new_sess
def test_bias_add_with_conv(self): """ Test bias add on conv op :return: """ tf.compat.v1.reset_default_graph() inputs = tf.keras.Input(shape=(32, 32, 3,), name="inputs") # create a conv without bias param conv_op = tf.keras.layers.Conv2D(32, (3, 3), use_bias=False)(inputs) bn_op = tf.keras.layers.BatchNormalization(fused=True)(conv_op) # pylint: disable=no-member _ = tf.nn.relu(bn_op) init = tf.compat.v1.global_variables_initializer() sess = tf.compat.v1.Session() sess.run(init) conv_op = sess.graph.get_operation_by_name('conv2d/Conv2D') self.assertTrue(BiasUtils.is_bias_none(conv_op)) # new_sess = BiasUtils.initialize_model_with_bias(sess) shape = BiasUtils.get_shape(conv_op) numpy_data = np.random.rand(shape[0]) BiasUtils.update_bias_for_op(sess, conv_op, bias_as_numpy_array=numpy_data) new_sess = save_and_load_graph('./temp_bn_fold', sess) conv_op = new_sess.graph.get_operation_by_name('conv2d/Conv2D') bias_as_numpy_data = BiasUtils.get_bias_as_numpy_data(new_sess, conv_op) assert(not BiasUtils.is_bias_none(conv_op)) new_sess.close()
def format_info_for_high_bias_fold(sess, layer_pairs, consecutive_layer_list, scaling_factor_list): """ Helper function that formats data from cross layer scaling and bn fold for usage by high bias fold. :param sess: tf.compat.v1.Session type :param layer_pairs: info obtained after batchnorm fold. :param consecutive_layer_list: info obtained after cross layer scaling :param scaling_factor_list: scaling params corresponding to consecutive_layer_list :return: data formatted for high bias fold. """ # convert info after batch norm fold and cross layer scaling for usage by high bias fold api folded_pairs = [] for (conv_op, bn_op_with_meta, _fold_upstream_flag) in layer_pairs: folded_pairs.append((conv_op, bn_op_with_meta.op)) # List that hold a boolean for if there were relu activations between layers of each cross layer scaling set is_relu_activation_in_cls_sets = [] # Note the user is expected to fill in this list manually # Convert to a list of cls-set-info elements cls_set_info_list = CrossLayerScaling.create_cls_set_info_list( consecutive_layer_list, scaling_factor_list, is_relu_activation_in_cls_sets) # load and save the updated graph after scaling after_cls_sess = save_and_load_graph('./temp_cls', sess) return after_cls_sess, folded_pairs, cls_set_info_list
def fold_given_batch_norms( sess: tf.compat.v1.Session, input_op_names: Union[str, List[str]], output_op_names: Union[str, List[str]], layer_pairs: List[Tuple[tf.Operation, tf.Operation, bool]] ) -> tf.compat.v1.Session: """ Api to fold custom set of bn layers in a model :param sess: active tensorflow session :param input_op_names: starting op in model or a list of starting ops in the model :param layer_pairs: List of tuple with conv and bn op layers as tf.Operation and a flag to indicate fold upstream or downstream :param output_op_names: List of output op names of the model, used to help ConnectedGraph determine valid ops (to ignore training ops for example). :return: updated_session after fold """ # check for valid types if not isinstance(input_op_names, (str, List)): logger.error( 'start op names must be passed as a string or a List of strings') # if passed start op name is a single string, create a list if isinstance(input_op_names, str): input_op_names = [input_op_names] connected_graph = ConnectedGraph(sess.graph, input_op_names, output_op_names) conn_tf_n_op_map = {} for op in connected_graph.get_all_ops().values(): if op.type in ['FusedBatchNormV3', 'FusedBatchNorm']: conn_tf_n_op_map[op.get_module()] = op layer_pairs_internal_format = [] for layer_pair in layer_pairs: conv_op, bn_op, is_bn_op_second = layer_pair layer_pairs_internal_format.append( (conv_op, conn_tf_n_op_map[bn_op].get_tf_op_with_io_tensor(), is_bn_op_second)) # invoke internal api new_sess = _fold_given_auto_selected_batch_norms( sess, layer_pairs_internal_format) # save and load graph after_fold_sess = save_and_load_graph('./temp_graph', new_sess) return after_fold_sess
def find_and_replace_relu6_with_relu(self, sess: tf.compat.v1.Session) -> tf.compat.v1.Session: """ finds and replaces Relu6 ops with Relu :return: updated session """ for op in self._connected_graph.get_all_ops().values(): if op.type in ['Relu6']: # send the session here, so we make the update on sess.graph (active graph) ReluUtils.replace_relu6_with_relu(sess, op.get_module()) # in the end update the session after_relu_replace_sess = save_and_load_graph('./replace_relu6_with_relu', sess) return after_relu_replace_sess
def test_reducing_vgg16_slim(self): """ Test reducing vgg16 slim model """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() module_zero_channels_list = [] inp = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3]) _ = vgg.vgg_16(inp) init = tf.compat.v1.global_variables_initializer() sess.run(init) input_op_names = ["Placeholder"] output_op_names = ['vgg_16/fc8/squeezed'] tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "vgg_16/fc7/Conv2D") input_channels_to_winnow = [2, 3, 4] module_mask_pair = (tf_op, input_channels_to_winnow) module_zero_channels_list.append(module_mask_pair) new_sess, ordered_modules_list = winnow.winnow_tf_model( sess, input_op_names, output_op_names, module_zero_channels_list, reshape=True, in_place=True, verbose=True) # Save and reload modified graph to allow changes to take effect new_sess = save_and_load_graph('./saver', new_sess) # _ = tf.compat.v1.summary.FileWriter('./reduced_graph', new_sess.graph) with new_sess.graph.as_default(): inp = tf.random.uniform(shape=(1, 224, 224, 3)) inp_array = inp.eval(session=new_sess) model_input = new_sess.graph.get_tensor_by_name("Placeholder:0") model_output = new_sess.graph.get_tensor_by_name( "vgg_16/fc8/squeezed:0") # run through entire model to check no error is produced _ = new_sess.run(model_output, feed_dict={model_input: inp_array}) self.assertEqual(4, len(ordered_modules_list)) new_sess.close() sess.close()
def scale_model(sess: tf.compat.v1.Session, input_op_names: Union[str, List[str]], output_op_names: Union[str, List[str]])\ -> (tf.compat.v1.Session, List[ClsSetInfo]): """ Uses cross-layer scaling to scale all applicable layers in the given model :param sess: Session containing graph to scale :param input_op_names: Names of starting ops in the model :param output_op_names: List of output op names of the model, used to help ConnectedGraph determine valid ops (to ignore training ops for example). If None, all ops in the model are considered valid. :return: updated session, CLS information for each CLS set """ if isinstance(input_op_names, str): input_op_names = [input_op_names] if isinstance(output_op_names, str): output_op_names = [output_op_names] # Find layer groups graph_search = GraphSearchUtils(sess.graph, input_op_names, output_op_names) tf_op_to_conn_graph_op_map, layer_groups_as_tf_ops = graph_search.find_layer_groups_to_scale() # Find cls sets from the layer groups cls_sets = [] for layer_group in layer_groups_as_tf_ops: cls_set = graph_search.convert_layer_group_to_cls_sets(layer_group) cls_sets += cls_set # Scale the CLS sets scale_factors = CrossLayerScaling.scale_cls_sets(sess, cls_sets) # Find if there were relu activations between layers of each cls set is_relu_activation_in_cls_sets = graph_search.is_relu_activation_present_in_cls_sets(cls_sets, tf_op_to_conn_graph_op_map) # Convert to a list of cls-set-info elements cls_set_info_list = CrossLayerScaling.create_cls_set_info_list(cls_sets, scale_factors, is_relu_activation_in_cls_sets) # save and load the updated graph after scaling after_cls_sess = save_and_load_graph('./temp_cls', sess) return after_cls_sess, cls_set_info_list
def prune_model(self, layer_db: LayerDatabase, layer_comp_ratio_list: List[LayerCompRatioPair], cost_metric: CostMetric, trainer): # sort all the layers in layer_comp_ratio_list based on occurrence layer_comp_ratio_list = self._sort_on_occurrence( layer_db.model, layer_comp_ratio_list) # Copy the db comp_layer_db = copy.deepcopy(layer_db) current_sess = comp_layer_db.model # Dictionary to map original layer name to list of most recent pruned layer name and output mask. # Masks remain at the original length and specify channels winnowed after each round of winnower. orig_layer_name_to_pruned_name_and_mask_dict = {} # Dictionary to map most recent pruned layer name to the original layer name pruned_name_to_orig_name_dict = {} # List to hold original layers to reconstruct layers_to_reconstruct = [] detached_op_names = set() # Prune layers which have comp ratios less than 1 for layer_comp_ratio in layer_comp_ratio_list: orig_layer = layer_db.find_layer_by_name( layer_comp_ratio.layer.name) if layer_comp_ratio.comp_ratio is not None and layer_comp_ratio.comp_ratio < 1.0: # 1) channel selection prune_indices = self._select_inp_channels( orig_layer, layer_comp_ratio.comp_ratio) if not prune_indices: continue # 2) Winnowing the model current_sess, ordered_modules_list = winnow.winnow_tf_model( current_sess, self._input_op_names, self._output_op_names, [(orig_layer.module, prune_indices)], reshape=self._allow_custom_downsample_ops, in_place=True, verbose=False) if not ordered_modules_list: continue layers_to_reconstruct.append(orig_layer) # Update dictionaries with new info about pruned ops and new masks self._update_pruned_ops_and_masks_info( ordered_modules_list, orig_layer_name_to_pruned_name_and_mask_dict, pruned_name_to_orig_name_dict, detached_op_names) # Save and reload modified graph to allow changes to take effect # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during # winnow_tf_model, and all other newly winnowed ops are not. with current_sess.graph.as_default(): initialize_uninitialized_vars(current_sess) current_sess = save_and_load_graph('./saver', current_sess) comp_layer_db.update_database(current_sess, detached_op_names, update_model=True) # Perform reconstruction self._reconstruct_layers(layers_to_reconstruct, orig_layer_name_to_pruned_name_and_mask_dict, layer_db, comp_layer_db) return comp_layer_db
def compress_model( sess: tf.compat.v1.Session, working_dir: str, eval_callback: EvalFunction, eval_iterations, input_shape: Union[Tuple, List[Tuple]], compress_scheme: CompressionScheme, cost_metric: CostMetric, parameters: Union[SpatialSvdParameters, ChannelPruningParameters], trainer=None, visualization_url=None ) -> Tuple[tf.compat.v1.Session, CompressionStats]: """ Compress a given model using the specified parameters :param sess: Model, represented by a tf.compat.v1.Session, to compress :param working_dir: File path to save compressed TensorFlow meta file :param eval_callback: Evaluation callback. Expected signature is evaluate(model, iterations, use_cuda). Expected to return an accuracy metric. :param eval_iterations: Iterations to run evaluation for :param trainer: Training Class: Contains a callable, train_model, which takes model, layer which is being fine tuned and an optional parameter train_flag as a parameter None: If per layer fine tuning is not required while creating the final compressed model :param input_shape: tuple or list of tuples of input shapes to the model (channels_last format) :param compress_scheme: Compression scheme. See the enum for allowed values :param cost_metric: Cost metric to use for the compression-ratio (either mac or memory) :param parameters: Compression parameters specific to given compression scheme :param trainer: Training function None: If per layer fine tuning is not required while creating the final compressed model :param visualization_url: url the user will need to input where visualizations will appear :return: A tuple of the compressed model session, and compression statistics """ # If no url is passed in, then do not create a bokeh server session if not visualization_url: bokeh_session = None else: # create a bokeh session to publish visualizations to the server document for compression bokeh_session = BokehServerSession(url=visualization_url, session_id="compression") if parameters.multiplicity < 1: raise ValueError('Rounding Multiplicity should be greater than 1') if compress_scheme == CompressionScheme.spatial_svd: # wrapper_func saves and reloads the graph before evaluation # In TF after making changes to the graph you must save and reload, then evaluate eval_callback = wrapper_func(eval_callback) algo = CompressionFactory.create_spatial_svd_algo( sess, working_dir, eval_callback, eval_iterations, input_shape, cost_metric, parameters, bokeh_session) elif compress_scheme == CompressionScheme.channel_pruning: algo = CompressionFactory.create_channel_pruning_algo( sess, working_dir, eval_callback, input_shape, eval_iterations, cost_metric, parameters, bokeh_session) else: raise ValueError( "Compression scheme not supported: {}".format(compress_scheme)) compressed_layer_db, stats = algo.compress_model(cost_metric, trainer) # TODO: this is a temporary fix, needs to be resolved # In TF after making changes to the graph you must save and reload, then evaluate updated_model = save_and_load_graph('./saver', compressed_layer_db.model) compressed_layer_db.model.close() return updated_model, stats
def test_prune_layer(self): """ Pruning single layer with 0.5 comp-ratio in MNIST""" # create tf.compat.v1.Session and initialize the weights and biases with zeros config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # create session with graph sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) with sess.graph.as_default(): # by default, model will be constructed in default graph _ = mnist_tf_model.create_model(data_format='channels_last') sess.run(tf.compat.v1.global_variables_initializer()) # Create a layer database orig_layer_db = LayerDatabase(model=sess, input_shape=(1, 28, 28, 1), working_dir=None) # Copy the db comp_layer_db = copy.deepcopy(orig_layer_db) conv1 = comp_layer_db.find_layer_by_name('conv2d/Conv2D') # before the splitting bias_op = get_succeeding_bias_op(conv1.module) for consumer in bias_op.outputs[0].consumers(): self.assertEqual(consumer.name, "conv2d/Relu") spatial_svd_pruner = SpatialSvdPruner() spatial_svd_pruner._prune_layer(orig_layer_db, comp_layer_db, conv1, 0.5, CostMetric.mac) conv2d_a_op = comp_layer_db.model.graph.get_operation_by_name( 'conv2d_a/Conv2D') conv2d_b_op = comp_layer_db.model.graph.get_operation_by_name( 'conv2d_b/Conv2D') conv2d_a_weight = WeightTensorUtils.get_tensor_as_numpy_data( comp_layer_db.model, conv2d_a_op) conv2d_b_weight = WeightTensorUtils.get_tensor_as_numpy_data( comp_layer_db.model, conv2d_b_op) conv1_a = comp_layer_db.find_layer_by_name('conv2d_a/Conv2D') conv1_b = comp_layer_db.find_layer_by_name('conv2d_b/Conv2D') # [Noc, Nic, kh, kw] self.assertEqual([2, 1, 5, 1], conv1_a.weight_shape) self.assertEqual([32, 2, 1, 5], conv1_b.weight_shape) # after the splitting bias_op = get_succeeding_bias_op(conv1_b.module) for consumer in bias_op.outputs[0].consumers(): self.assertEqual(consumer.name, "conv2d/Relu") # original layer should be not there in the database self.assertRaises( KeyError, lambda: comp_layer_db.find_layer_by_name('conv2d/Conv2D')) # check if the layer replacement is done correctly orig_conv_op = comp_layer_db.model.graph.get_operation_by_name( 'conv2d/Conv2D') bias_op = get_succeeding_bias_op(orig_conv_op) # consumers list should be empty consumers = [consumer for consumer in bias_op.outputs[0].consumers()] self.assertEqual(len(consumers), 0) # Check that weights loaded during svd pruning will stick after save and load new_sess = save_and_load_graph('./temp_meta/', comp_layer_db.model) conv2d_a_op = comp_layer_db.model.graph.get_operation_by_name( 'conv2d_a/Conv2D') conv2d_b_op = comp_layer_db.model.graph.get_operation_by_name( 'conv2d_b/Conv2D') conv2d_a_weight_after_save_load = WeightTensorUtils.get_tensor_as_numpy_data( comp_layer_db.model, conv2d_a_op) conv2d_b_weight_after_save_load = WeightTensorUtils.get_tensor_as_numpy_data( comp_layer_db.model, conv2d_b_op) self.assertTrue( np.array_equal(conv2d_a_weight, conv2d_a_weight_after_save_load)) self.assertTrue( np.array_equal(conv2d_b_weight, conv2d_b_weight_after_save_load)) tf.compat.v1.reset_default_graph() sess.close() new_sess.close() # delete temp directory shutil.rmtree(str('./temp_meta/'))
def _fold_given_auto_selected_batch_norms( sess: tf.compat.v1.Session, layer_pairs: List[PairType]) -> tf.compat.v1.Session: """ Fold a given set of batch_norm layers into conv layers :param sess: tf.compat.v1.Session :param layer_pairs: pair of conv and bn layers :return: new session with updated graph """ with sess.graph.as_default(): for pair in layer_pairs: conv_linear, batchnorm, is_batch_norm_second = pair assert conv_linear.type in [ 'Conv2D', 'DepthwiseConv2dNative', 'MatMul' ] # check flag is_bias_valid = False if not BiasUtils.is_bias_none(conv_linear): is_bias_valid = True bn_params = _get_bn_params(sess, batchnorm.op) weight_tensor = _get_weight_tensor_transpose_reshape( sess, conv_linear) bias_tensor = _get_bias_tensor(sess, conv_linear) bias = libpymo.fold(bn_params, weight_tensor, bias_tensor, is_bias_valid, is_batch_norm_second) # converting back to TF format [kh, kw, Nic, Noc] before updating weight tensor value if conv_linear.type == 'DepthwiseConv2dNative': # Depthwise conv layers in TF have outputs(Noc) set to 1. # we send in format [Nic, Noc, kh, kw] numpy_weight_reshaped = np.reshape( weight_tensor.data, weight_tensor.shape).transpose( (2, 3, 0, 1)) elif conv_linear.type == 'MatMul': # o, i - convert to i , o numpy_weight_reshaped = np.reshape( weight_tensor.data, [weight_tensor.shape[0], weight_tensor.shape[1] ]).transpose(1, 0) else: # conv2D case # we sent in format [Noc, Nic, kh, kw] numpy_weight_reshaped = np.reshape( weight_tensor.data, weight_tensor.shape).transpose( (2, 3, 1, 0)) WeightTensorUtils.update_tensor_for_op(sess, conv_linear, numpy_weight_reshaped) # remove bn op BNUtils.skip_bn_op(sess, batchnorm.op, batchnorm.in_tensor, batchnorm.out_tensor) # update bias tensor, even in case there was no existing bias add op in given conv2D op. bias_tensor_shape = [weight_tensor.shape[0]] numpy_bias_reshaped = np.reshape(bias, bias_tensor_shape) BiasUtils.update_bias_for_op(sess, conv_linear, numpy_bias_reshaped) # we edited the graph, so we should load and save for the metagraph associated with the session to be updated after_bn_fold_sess = save_and_load_graph('./temp_bn_fold', sess) return after_bn_fold_sess
def test_reducing_inceptionV3(self): """ Test module reduction in inceptionV3 """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() module_zero_channels_list = [] _ = InceptionV3(weights=None) init = tf.compat.v1.global_variables_initializer() sess.run(init) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2d_12/Conv2D") input_channels_to_winnow = [0, 1, 64, 128, 224] module_mask_pair = (tf_op, input_channels_to_winnow) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2d_13/Conv2D") input_channels_to_winnow_1 = [0, 64, 65, 66, 128, 224] module_mask_pair = (tf_op, input_channels_to_winnow_1) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2d_15/Conv2D") input_channels_to_winnow_2 = [0, 64, 128, 129, 130, 131, 224] module_mask_pair = (tf_op, input_channels_to_winnow_2) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2d_18/Conv2D") input_channels_to_winnow_3 = [0, 64, 128, 224, 225, 226, 227, 228] module_mask_pair = (tf_op, input_channels_to_winnow_3) module_zero_channels_list.append(module_mask_pair) input_op_names = ["input_1"] output_op_names = ['predictions/Softmax'] new_sess, ordered_modules_list = winnow.winnow_tf_model( sess, input_op_names, output_op_names, module_zero_channels_list, reshape=True, in_place=True, verbose=True) # Save and reload modified graph to allow changes to take effect # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during # winnow_tf_model, and all other newly winnowed ops are not. with new_sess.graph.as_default(): initialize_uninitialized_vars(new_sess) new_sess = save_and_load_graph('./saver', new_sess) # _ = tf.compat.v1.summary.FileWriter('./reduced_graph', new_sess.graph) with new_sess.graph.as_default(): inp = tf.random.uniform(shape=(1, 299, 299, 3)) inp_array = inp.eval(session=new_sess) model_input = new_sess.graph.get_tensor_by_name("input_1:0") model_output = new_sess.graph.get_tensor_by_name( "predictions/Softmax:0") # check that reduced tensor shapes are as expected reduced_conv2d_12_input = new_sess.graph.get_operation_by_name( "reduced_conv2d_12/Conv2D").inputs[0] reduced_conv2d_13_input = new_sess.graph.get_operation_by_name( "reduced_conv2d_13/Conv2D").inputs[0] reduced_conv2d_15_input = new_sess.graph.get_operation_by_name( "reduced_conv2d_15/Conv2D").inputs[0] reduced_conv2d_18_input = new_sess.graph.get_operation_by_name( "reduced_conv2d_18/Conv2D").inputs[0] reduced_conv2d_5_output = new_sess.graph.get_tensor_by_name( "reduced_conv2d_5/Conv2D:0") reduced_conv2d_7_output = new_sess.graph.get_tensor_by_name( "reduced_conv2d_7/Conv2D:0") reduced_conv2d_10_output = new_sess.graph.get_tensor_by_name( "reduced_conv2d_10/Conv2D:0") reduced_conv2d_11_output = new_sess.graph.get_tensor_by_name( "reduced_conv2d_11/Conv2D:0") self.assertEqual(251, reduced_conv2d_12_input.shape.as_list()[-1]) self.assertEqual(250, reduced_conv2d_13_input.shape.as_list()[-1]) self.assertEqual(249, reduced_conv2d_15_input.shape.as_list()[-1]) self.assertEqual(248, reduced_conv2d_18_input.shape.as_list()[-1]) self.assertEqual(63, reduced_conv2d_5_output.shape.as_list()[-1]) self.assertEqual(63, reduced_conv2d_7_output.shape.as_list()[-1]) self.assertEqual(95, reduced_conv2d_10_output.shape.as_list()[-1]) self.assertEqual(31, reduced_conv2d_11_output.shape.as_list()[-1]) self.assertEqual(17, len(ordered_modules_list)) # run through entire model to check no error is produced _ = new_sess.run(model_output, feed_dict={model_input: inp_array}) new_sess.close() sess.close()
def test_reducing_resnet_50(self): """ Test module reduction in resnet_50 """ tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() module_zero_channels_list = [] _ = ResNet50(weights=None) init = tf.compat.v1.global_variables_initializer() sess.run(init) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2_block1_1_conv/Conv2D") input_channels_to_winnow_1 = [3, 5, 7] module_mask_pair = (tf_op, input_channels_to_winnow_1) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv2_block1_0_conv/Conv2D") input_channels_to_winnow_2 = [3, 5, 7, 8] module_mask_pair = (tf_op, input_channels_to_winnow_2) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv3_block1_1_conv/Conv2D") input_channels_to_winnow_3 = [3, 5, 7] module_mask_pair = (tf_op, input_channels_to_winnow_3) module_zero_channels_list.append(module_mask_pair) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "conv3_block1_0_conv/Conv2D") input_channels_to_winnow_4 = [3, 5, 7, 8] module_mask_pair = (tf_op, input_channels_to_winnow_4) module_zero_channels_list.append(module_mask_pair) input_op_names = ["input_1"] output_op_names = ['probs/Softmax'] new_sess, ordered_modules_list = winnow.winnow_tf_model( sess, input_op_names, output_op_names, module_zero_channels_list, reshape=True, in_place=True, verbose=True) # Save and reload modified graph to allow changes to take effect # Need to initialize uninitialized variables first since only newly winnowed conv ops are initialized during # winnow_tf_model, and all other newly winnowed ops are not. with new_sess.graph.as_default(): initialize_uninitialized_vars(new_sess) new_sess = save_and_load_graph('./saver', new_sess) # _ = tf.compat.v1.summary.FileWriter('./reduced_graph', new_sess.graph) with new_sess.graph.as_default(): inp = tf.random.uniform(shape=(1, 224, 224, 3)) inp_array = inp.eval(session=new_sess) model_input = new_sess.graph.get_tensor_by_name("input_1:0") model_output = new_sess.graph.get_tensor_by_name("probs/Softmax:0") # check that reduced tensor shapes are as expected reduced_conv3_block1_1_input = new_sess.graph.get_operation_by_name( "reduced_conv3_block1_1_conv/" "Conv2D").inputs[0] reduced_conv3_block1_0_input = new_sess.graph.get_operation_by_name( "reduced_conv3_block1_0_conv/" "Conv2D").inputs[0] reduced_conv2_block3_3_output = new_sess.graph.get_tensor_by_name( "reduced_conv2_block3_3_conv/" "Conv2D:0") reduced_conv2_block1_1_input = new_sess.graph.get_operation_by_name( "reduced_conv2_block1_1_conv/" "Conv2D").inputs[0] reduced_conv2_block1_0_input = new_sess.graph.get_operation_by_name( "reduced_conv2_block1_0_conv/" "Conv2D").inputs[0] reduced_conv1_output = new_sess.graph.get_tensor_by_name( "reduced_conv1_conv/Conv2D:0") self.assertEqual(253, reduced_conv3_block1_1_input.shape.as_list()[-1]) self.assertEqual(252, reduced_conv3_block1_0_input.shape.as_list()[-1]) self.assertEqual(253, reduced_conv2_block3_3_output.shape.as_list()[-1]) self.assertEqual(61, reduced_conv2_block1_1_input.shape.as_list()[-1]) self.assertEqual(60, reduced_conv2_block1_0_input.shape.as_list()[-1]) self.assertEqual(61, reduced_conv1_output.shape.as_list()[-1]) # run through entire model to check no error is produced _ = new_sess.run(model_output, feed_dict={model_input: inp_array}) self.assertEqual(11, len(ordered_modules_list)) new_sess.close() sess.close()
def correct_bias(reference_model: tf.compat.v1.Session, bias_correct_params: BiasCorrectionParams, quant_params: QuantParams, data_set: tf.data.Dataset, conv_bn_dict: Union[Dict[tf.Operation, ConvBnInfoType], None] = None, perform_only_empirical_bias_corr: bool = True): """ Top level function for bias correction :param reference_model: active tf.compat.v1.Session for the model to be corrected. :param bias_correct_params: input params for bias correction :param quant_params: QuantParams type with params for quantization simulation for bias correction. :param data_set: input data set :param conv_bn_dict: Dict of conv and bn with activation info. If None, the function looks for it. This can be obtained on the model with bns and convs using BiasCorrection.find_all_convs_bn_with_activation() api. :param perform_only_empirical_bias_corr: a flag to indicate only empirical bias correction is to be performed. :return: updated session with corrected bias for given ops """ # one time initialization of all layers with bias param reference_model = BiasUtils.initialize_model_with_bias( reference_model, bias_correct_params.input_op_names, bias_correct_params.output_op_names) # Create a copy of the model as reference model corrected_model = save_and_load_graph('./temp_meta_path', reference_model) # get all ordered convs/ linears and skip gradient ops ordered_conv_linears = get_ordered_conv_linears( reference_model, bias_correct_params.input_op_names, bias_correct_params.output_op_names) # Get conv2D, depthwise with preceding BN ops info for analytical bias correction # if user has not passed any dictionary if conv_bn_dict is None: convs_bn_activation_info_dict = BiasCorrection.find_all_convs_bn_with_activation( reference_model, bias_correct_params.input_op_names, bias_correct_params.output_op_names) else: convs_bn_activation_info_dict = BiasCorrection.refresh_op_ref( reference_model, conv_bn_dict) # Perform analytical bias correction for first conv layer # we always perform empirical bias correction for linear layers if ordered_conv_linears: if not perform_only_empirical_bias_corr and ordered_conv_linears[ 0].type not in ['MatMul']: first_conv = ordered_conv_linears.pop(0) BiasCorrection.analytical_bias_correction_per_layer( corrected_model, first_conv, None, quant_params, is_first_conv=True) # for each candidate layer in an ordered list of conv/lieanr ops # find the corresponding bn and activation info for layer in ordered_conv_linears: # if this layer is in selected patterns of convs with preceding BN op and # if empirical flag is false # perform analytical Bias correction if layer in convs_bn_activation_info_dict.keys( ) and not perform_only_empirical_bias_corr: preceding_bn_layer_info = convs_bn_activation_info_dict[layer] BiasCorrection.analytical_bias_correction_per_layer( corrected_model, layer, preceding_bn_layer_info, quant_params) else: # stand-alone convs/ linears or when perform_only_empirical_bias_corr is set to True # perform empirical bias correction BiasCorrection.bias_correction_per_layer( reference_model, corrected_model, bias_correct_params, layer.name, quant_params, data_set) logger.info('Completed bias correction') return corrected_model
def bias_fold(sess: tf.compat.v1.Session, folded_pairs: List[Tuple[tf.Operation, tf.Operation]], cls_set_info_list: List[ClsSetInfo]) -> tf.compat.v1.Session: """ Folds bias values greater than 3 * sigma to next layer's bias :param sess: Current session :param folded_pairs: Key: Conv/Linear layer Value: Corresponding folded BN layer :param cls_set_info_list: List of info elements for each cls set :return: updated session after graph updates from hbf """ with sess.graph.as_default(): # refresh the references saved during bn fold and cls. cls_set_info_list, bn_layers = HighBiasFold._refresh_layer_set_info_before_hbf(sess, folded_pairs, cls_set_info_list) if not bn_layers: logger.error('High Bias folding is not supported for models without BatchNorm Layers') return sess for cls_set_info in cls_set_info_list: for cls_pair_info in cls_set_info.cls_pair_info_list: # check if we have a corresponding bn layer if cls_pair_info.layer1.name in bn_layers.keys(): # check if bias present in given conv2D(s) if BiasUtils.is_bias_none(cls_pair_info.layer1) or BiasUtils.is_bias_none(cls_pair_info.layer2): continue prev_layer_params = libpymo.LayerParams() curr_layer_params = libpymo.LayerParams() scaling_parameter = cls_pair_info.scale_factor prev_layer_bn_params =\ HighBiasFold.get_bn_params_for_bias_fold(sess, bn_layers[cls_pair_info.layer1.name], scaling_parameter) prev_layer_params.activationIsRelu = cls_pair_info.relu_activation_between_layers prev_layer_params.bias =\ BiasUtils.get_bias_as_numpy_data(sess, cls_pair_info.layer1).reshape(-1) prev_bias_shape = BiasUtils.get_shape(cls_pair_info.layer1) weight_shape = WeightTensorUtils.get_tensor_shape(cls_pair_info.layer1) prev_layer_params.weightShape = [weight_shape[3], weight_shape[2], weight_shape[0], weight_shape[1]] curr_layer_params.bias =\ BiasUtils.get_bias_as_numpy_data(sess, cls_pair_info.layer2).reshape(-1) curr_bias_shape = BiasUtils.get_shape(cls_pair_info.layer2) weight_shape = WeightTensorUtils.get_tensor_shape(cls_pair_info.layer2) # Handle depthwise layer case # for a depthwise layer num outputs is set to 1 in TF # send as [Nic, Noc, kh, kw], TF format is [kh, kw, Nic, Noc] if cls_pair_info.layer2.type in ['DepthwiseConv2dNative']: c_wt = WeightTensorUtils.get_tensor_as_numpy_data( sess, cls_pair_info.layer2).transpose((2, 3, 0, 1)) curr_layer_params.weight = c_wt.reshape(-1) curr_layer_params.weightShape = [weight_shape[2], weight_shape[3], weight_shape[0], weight_shape[1]] else: # send as [Noc, Nic, kh, kw], TF format is [kh, kw, Nic, Noc] c_wt = WeightTensorUtils.get_tensor_as_numpy_data( sess, cls_pair_info.layer2).transpose((3, 2, 0, 1)) curr_layer_params.weight = c_wt.reshape(-1) curr_layer_params.weightShape = [weight_shape[3], weight_shape[2], weight_shape[0], weight_shape[1]] libpymo.updateBias(prev_layer_params, curr_layer_params, prev_layer_bn_params) BiasUtils.update_bias_for_op(sess, cls_pair_info.layer1, np.reshape(prev_layer_params.bias, prev_bias_shape)) BiasUtils.update_bias_for_op(sess, cls_pair_info.layer2, np.reshape(curr_layer_params.bias, curr_bias_shape)) else: logger.info("skipping layer: {%s}", cls_pair_info.layer1.name) # save and load the updated graph after high bias fold update aftr_hbf_sess = save_and_load_graph('./temp_hbf', sess) return aftr_hbf_sess
def test_reducing_vgg16(self): """ This test winnows a VGG16 model""" tf.compat.v1.reset_default_graph() sess = tf.compat.v1.Session() module_zero_channels_list = [] _ = VGG16(weights=None) init = tf.compat.v1.global_variables_initializer() sess.run(init) tf_op = tf.compat.v1.get_default_graph().get_operation_by_name( "block5_conv1/Conv2D") input_channels_to_winnow = [3, 5, 7] module_mask_pair = (tf_op, input_channels_to_winnow) module_zero_channels_list.append(module_mask_pair) tf_op_2 = tf.compat.v1.get_default_graph().get_operation_by_name( "block3_conv1/Conv2D") input_channels_to_winnow_2 = [11, 13, 15, 17] module_mask_pair_2 = (tf_op_2, input_channels_to_winnow_2) module_zero_channels_list.append(module_mask_pair_2) tf_op_3 = tf.compat.v1.get_default_graph().get_operation_by_name( "block2_conv2/Conv2D") input_channels_to_winnow_3 = [1, 2, 3, 4, 5] module_mask_pair_3 = (tf_op_3, input_channels_to_winnow_3) module_zero_channels_list.append(module_mask_pair_3) tf_op_4 = tf.compat.v1.get_default_graph().get_operation_by_name( "block2_conv1/Conv2D") input_channels_to_winnow_4 = [20, 21, 22, 23] module_mask_pair_4 = (tf_op_4, input_channels_to_winnow_4) module_zero_channels_list.append(module_mask_pair_4) input_op_names = ["input_1"] output_op_names = ['predictions/Softmax'] new_sess, ordered_modules_list = winnow.winnow_tf_model( sess, input_op_names, output_op_names, module_zero_channels_list, reshape=True, in_place=True, verbose=True) # Save and reload modified graph to allow changes to take effect new_sess = save_and_load_graph('./saver', new_sess) # uncomment the following to generate tensorboard viewable file # _ = tf.compat.v1.summary.FileWriter('./reduced_graph', new_sess.graph) # Check certain weight indices to ensure that weights were reduced correctly b4c3_kernel = new_sess.graph.get_tensor_by_name( "block4_conv3/kernel/Read/" "ReadVariableOp:0").eval(session=new_sess) red_b4c3_kernel = new_sess.graph.get_tensor_by_name( "reduced_block4_conv3/kernel/" "Read/ReadVariableOp:0").eval(session=new_sess) self.assertEqual(red_b4c3_kernel.shape, (3, 3, 512, 509)) self.assertEqual(red_b4c3_kernel[0][0][0][2], b4c3_kernel[0][0][0][2]) self.assertEqual(np.sum(red_b4c3_kernel[0][0][0][5:]), np.sum(b4c3_kernel[0][0][0][8:])) # Test that evaluating the new session uses the newly reduced modules. # Do so by first evaluating a tensor in a module coming after a set of reduced modules. # Zero out weights and biases of one of the original unreduced modules preceding the tensor. # Reevaluate the tensor and expect to see no change, since the original unreduced module should not be used # anymore. # Then zero out weights and biases of one of the newly reduced modules. # Finally reevaluate the same tensor as before. This time, we expect to see the result be zero. with new_sess.graph.as_default(): inp = tf.random.uniform(shape=(1, 224, 224, 3)) inp_array = inp.eval(session=new_sess) model_input = new_sess.graph.get_tensor_by_name("input_1:0") # run through entire model to check no error is produced model_output = new_sess.graph.get_tensor_by_name( "predictions/Softmax:0") _ = new_sess.run(model_output, feed_dict={model_input: inp_array}) self.assertEqual(13, len(ordered_modules_list)) new_sess.close() sess.close()
def test_bias_correction_model_tf_with_no_bias(self): """ Test bias correction for custom model """ tf.compat.v1.reset_default_graph() inputs = tf.keras.Input(shape=( 32, 32, 3, )) conv_op = tf.keras.layers.Conv2D(32, (3, 3), use_bias=False)(inputs) relu_1 = tf.nn.relu(conv_op) conv2_op = tf.keras.layers.Conv2D(32, (3, 3), use_bias=False)(relu_1) relu_2 = tf.nn.relu(conv2_op) conv3_op = tf.keras.layers.Conv2D(32, (3, 3), use_bias=False)(relu_2) _ = tf.nn.relu(conv3_op) init = tf.compat.v1.global_variables_initializer() sess = tf.compat.v1.Session() sess.run(init) # updating random bias and weight for one conv np.random.seed(0) conv_op = sess.graph.get_operation_by_name('conv2d/Conv2D') w_shape = WeightTensorUtils.get_tensor_shape(conv_op) w_shape = WeightTensorUtils.get_tensor_shape(conv_op) w_numpy_data = np.random.rand(w_shape[0], w_shape[1], w_shape[2], w_shape[3]) # save and load the updated graph after high bias fold update n_sess = save_and_load_graph('./test_update', sess) conv_op = n_sess.graph.get_operation_by_name('conv2d/Conv2D') input_op_name = conv_op.inputs[0].op.name output_op = n_sess.graph.get_operation_by_name('Relu_2') input_op_names = [input_op_name] output_op_names = [output_op.name] batch_size = 1 num_samples = 10 np.random.seed(0) shape = conv_op.inputs[0].shape dataset = np.random.rand(10, 1, shape[1], shape[2], shape[3]) dataset = tf.convert_to_tensor(dataset) dataset = tf.data.Dataset.from_tensor_slices(dataset) quant_params = QuantParams(quant_mode='tf', use_cuda=False) bias_correction_params = BiasCorrectionParams( batch_size=batch_size, num_quant_samples=num_samples, num_bias_correct_samples=num_samples, input_op_names=input_op_names, output_op_names=output_op_names) conv_op = sess.graph.get_operation_by_name('conv2d_1/Conv2D') assert (BiasUtils.is_bias_none(conv_op)) new_sess = BiasCorrection.correct_bias( n_sess, bias_correction_params, quant_params, dataset, perform_only_empirical_bias_corr=False) conv_op = new_sess.graph.get_operation_by_name('conv2d_1/Conv2D') assert (not BiasUtils.is_bias_none(conv_op)) sess.close() n_sess.close() new_sess.close()