def test_bias_correction_empirical_with_config_file(self): # Using a dummy extension of MNIST torch.manual_seed(10) model = mnist_model.Net() model = model.eval() model_copy = copy.deepcopy(model) dataset_size = 2 batch_size = 1 data_loader = create_fake_data_loader(dataset_size=dataset_size, batch_size=batch_size, image_size=(1, 28, 28)) # Takes default config file params = qsim.QuantParams(weight_bw=4, act_bw=4, round_mode="nearest", quant_scheme=QuantScheme.post_training_tf, config_file=None) with unittest.mock.patch( 'aimet_torch.bias_correction.call_empirical_mo_correct_bias' ) as empirical_mock: bias_correction.correct_bias(model, params, 2, data_loader, 2) self.assertEqual(empirical_mock.call_count, 4) self.assertTrue( np.allclose(model.conv1.bias.detach().cpu().numpy(), model_copy.conv1.bias.detach().cpu().numpy())) self.assertTrue(model.conv2.bias.detach().cpu().numpy() is not None) self.assertTrue(model.fc1.bias.detach().cpu().numpy() is not None)
def testSpatialSvd(self): torch.manual_seed(1) model = mnist_torch_model.Net() rounding_algo = unittest.mock.MagicMock() rounding_algo.round.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] mock_eval = unittest.mock.MagicMock() mock_eval.side_effect = [ 100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 90, 80, 70, 60, 50, 40, 30, 20, 10, 50 ] layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) pruner = SpatialSvdPruner() comp_ratio_select_algo = GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), mock_eval, 20, CostMetric.mac, Decimal(0.5), 10, True, None, rounding_algo, True, bokeh_session=None) layer_selector = ConvNoDepthwiseLayerSelector() spatial_svd_algo = CompressionAlgo( layer_db, comp_ratio_select_algo, pruner, mock_eval, layer_selector, modules_to_ignore=[], cost_calculator=SpatialSvdCostCalculator(), use_cuda=next(model.parameters()).is_cuda) compressed_layer_db, stats = spatial_svd_algo.compress_model( CostMetric.mac, trainer=None) self.assertTrue( isinstance(compressed_layer_db.model.conv1, torch.nn.Sequential)) self.assertTrue( isinstance(compressed_layer_db.model.conv2, torch.nn.Sequential)) self.assertTrue(stats.per_layer_stats[0].compression_ratio <= 0.5) self.assertEqual(0.3, stats.per_layer_stats[1].compression_ratio) print("Compressed model:") print(compressed_layer_db.model) print(stats)
def test_calculate_spatial_svd_cost_all_layers_given_ranks(self): model = mnist_model.Net().to("cpu") ld = lad.LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_rank_list = [(ld.find_layer_by_module(model.conv1), 2), (ld.find_layer_by_module(model.conv2), 53), (ld.find_layer_by_module(model.fc1), 385), (ld.find_layer_by_module(model.fc2), 4)] compressed_cost = cc.SpatialSvdCostCalculator.calculate_compressed_cost_given_ranks( ld, layer_rank_list) self.assertEqual( 5244960 + (3136 * 385 + 385 * 1024) + (1024 * 4 + 4 * 10), compressed_cost.mac) # Create a list of tuples of (layer, comp_ratio) layer_rank_list = [(ld.find_layer_by_module(model.conv1), 2), (ld.find_layer_by_module(model.conv2), 53), (ld.find_layer_by_module(model.fc1), 385), (ld.find_layer_by_module(model.fc2), None)] compressed_cost = cc.SpatialSvdCostCalculator.calculate_compressed_cost_given_ranks( ld, layer_rank_list) self.assertEqual(5244960 + (3136 * 385 + 385 * 1024) + (1024 * 10), compressed_cost.mac)
def test_get_quantized_weight(self): model = mnist_model.Net() params = qsim.QuantParams(weight_bw=4, act_bw=4, round_mode="nearest", quant_scheme=QuantScheme.post_training_tf) use_cuda = False dataset_size = 2 batch_size = 1 data_loader = create_fake_data_loader(dataset_size=dataset_size, batch_size=batch_size) def pass_data_through_model(model, early_stopping_iterations=None, use_cuda=False): # forward pass for given number of batches for model for _, (images_in_one_batch, _) in enumerate(data_loader): model(images_in_one_batch) quantsim = qsim.QuantizationSimModel(model=model, quant_scheme=params.quant_scheme, rounding_mode=params.round_mode, default_output_bw=params.act_bw, default_param_bw=params.weight_bw, in_place=False, dummy_input=torch.rand( 1, 1, 28, 28)) quantsim.compute_encodings(pass_data_through_model, None) layer = quantsim.model.conv2 quant_dequant_weights = bias_correction.get_quantized_dequantized_weight( layer, use_cuda) self.assertEqual(quant_dequant_weights.shape, torch.Size([64, 32, 5, 5]))
def test_calculate_weight_svd_cost_all_layers(self): model = mnist_model.Net().to("cpu") print(model) layer_database = lad.LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] for layer in layer_database: if isinstance(layer.module, nn.Conv2d): layer_ratio_list.append( LayerCompRatioPair(layer, Decimal('0.5'))) else: layer_ratio_list.append( LayerCompRatioPair(layer, Decimal('0.5'))) compressed_cost = cc.WeightSvdCostCalculator.calculate_compressed_cost( layer_database, layer_ratio_list, CostMetric.mac) self.assertEqual(7031800, compressed_cost.mac)
def test_manual_mode(self): torch.cuda.empty_cache() net = mnist_model.Net() model = net.to(torch.device('cuda')) # Adding wrapper to first convolution layer for module_name, module_ref in model.named_children(): if module_name is 'conv1': quantized_module = QcPostTrainingWrapper( module_ref, weight_bw=8, activation_bw=8, round_mode='nearest', quant_scheme=QuantScheme.post_training_tf) setattr(model, module_name, quantized_module) sim = QuantizationSimModel(model, dummy_input=torch.rand(1, 1, 28, 28).cuda()) # Quantize the untrained MNIST model sim.compute_encodings(self.forward_pass, forward_pass_callback_args=5) # Run some inferences mnist_torch_model.evaluate(model=sim.model, iterations=100, use_cuda=True) # train the model again mnist_model.train(model=sim.model, epochs=1, num_batches=3, batch_callback=check_if_layer_weights_are_updating, use_cuda=True)
def test_calculate_spatial_svd_cost_all_layers(self): model = mnist_model.Net().to("cpu") print(model) layer_database = lad.LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) model_cost = cc.SpatialSvdCostCalculator.compute_model_cost( layer_database) self.assertEqual(627200 + 10035200 + 3211264 + 10240, model_cost.mac) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] for layer in layer_database: layer_ratio_list.append(LayerCompRatioPair(layer, Decimal(0.5))) compressed_cost = cc.SpatialSvdCostCalculator.calculate_compressed_cost( layer_database, layer_ratio_list, CostMetric.mac) self.assertEqual( 5244960 + (3136 * 385 + 385 * 1024) + (1024 * 4 + 4 * 10), compressed_cost.mac)
def test_prune_layer(self): model = mnist_model.Net() # Create a layer database orig_layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) # Copy the db comp_layer_db = copy.deepcopy(orig_layer_db) conv2 = comp_layer_db.find_layer_by_name('conv2') weight_svd_pruner = WeightSvdPruner() weight_svd_pruner._prune_layer(orig_layer_db, comp_layer_db, conv2, 0.5, aimet_common.defs.CostMetric.mac) conv2_a = comp_layer_db.find_layer_by_name('conv2.0') conv2_b = comp_layer_db.find_layer_by_name('conv2.1') self.assertEqual((1, 1), conv2_a.module.kernel_size) self.assertEqual(32, conv2_a.module.in_channels) self.assertEqual(15, conv2_a.module.out_channels) self.assertEqual((5, 5), conv2_b.module.kernel_size) self.assertEqual(15, conv2_b.module.in_channels) self.assertEqual(64, conv2_b.module.out_channels) self.assertTrue(isinstance(comp_layer_db.model.conv2, nn.Sequential)) for layer in comp_layer_db: print("Layer: " + layer.name) print(" Module: " + str(layer.module)) print(comp_layer_db.model)
def test_prune_layer(self): orig_model = mnist_torch_model.Net() orig_model.eval() # Create a layer database orig_layer_db = LayerDatabase(orig_model, input_shape=(1, 1, 28, 28)) # Copy the db comp_layer_db = copy.deepcopy(orig_layer_db) dataset_size = 100 batch_size = 10 # max out number of batches number_of_batches = 10 samples_per_image = 10 num_reconstruction_samples = number_of_batches * batch_size * samples_per_image # create fake data loader with image size (1, 28, 28) data_loader = create_fake_data_loader(dataset_size=dataset_size, batch_size=batch_size) input_channel_pruner = InputChannelPruner( data_loader=data_loader, input_shape=(1, 1, 28, 28), num_reconstruction_samples=num_reconstruction_samples, allow_custom_downsample_ops=True) conv2 = comp_layer_db.find_layer_by_name('conv2') input_channel_pruner._prune_layer(orig_layer_db, comp_layer_db, conv2, 0.5, CostMetric.mac) self.assertTrue(comp_layer_db.model.conv2.in_channels, 16) self.assertTrue(comp_layer_db.model.conv2.out_channels, 64)
def models(self): net = mnist_model.Net() model = net.to(torch.device('cpu')) quantizer = q.Quantizer(model=model, use_cuda=False) # Quantize quantizer.quantize_net(run_model=mnist_model.evaluate, bw_params=8, bw_acts=8, iterations=10) # Run some inferences mnist_model.evaluate(model, 10) # train the model again # mnist_model.train(model, 1, num_batches=1, batch_callback=check_if_layer_weights_are_updating) net = mnist_model.Net() original_model = net.to(torch.device('cpu')) return model, original_model
def test_select_per_layer_comp_ratios_with_spatial_svd_pruner(self): pruner = SpatialSvdPruner() eval_func = unittest.mock.MagicMock() rounding_algo = unittest.mock.MagicMock() eval_func.side_effect = [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 11, 21, 31, 35, 40, 45, 50, 55, 60 ] rounding_algo.round.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] model = mnist_torch_model.Net() layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) selected_layers = [ layer for layer in layer_db if isinstance(layer.module, nn.Conv2d) ] layer_db.mark_picked_layers(selected_layers) # Instantiate child greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), eval_func, 20, CostMetric.mac, Decimal(0.4), 10, True, None, rounding_algo, False, bokeh_session=None) layer_comp_ratio_list, stats = greedy_algo.select_per_layer_comp_ratios( ) original_cost = SpatialSvdCostCalculator.compute_model_cost(layer_db) for layer in layer_db: if layer not in selected_layers: layer_comp_ratio_list.append(LayerCompRatioPair(layer, None)) compressed_cost = SpatialSvdCostCalculator.calculate_compressed_cost( layer_db, layer_comp_ratio_list, CostMetric.mac) actual_compression_ratio = compressed_cost.mac / original_cost.mac self.assertTrue( math.isclose(Decimal(0.3), actual_compression_ratio, abs_tol=0.8)) print('\n') for pair in layer_comp_ratio_list: print(pair)
def test_pretrained_mnist_quantize(self): model = mnist_model.Net().to('cpu') mnist_model.train(model, epochs=1, num_batches=10) torch.save(model, 'data/mnist.pth') pretrained_model = torch.load('data/mnist.pth') quantizer = q.Quantizer(model=pretrained_model, use_cuda=False) # Quantize quantizer.quantize_net(bw_params=8, bw_acts=8, run_model=mnist_model.evaluate, iterations=10)
def test_prune_model_2_layers(self): model = mnist_torch_model.Net() # Create a layer database orig_layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) # Copy the db comp_layer_db = copy.deepcopy(orig_layer_db) conv1 = comp_layer_db.find_layer_by_name('conv1') conv2 = comp_layer_db.find_layer_by_name('conv2') pruner = SpatialSvdPruner() layer_db = pruner.prune_model(orig_layer_db, [ LayerCompRatioPair(conv1, Decimal(0.5)), LayerCompRatioPair(conv2, Decimal(0.5)) ], CostMetric.mac, trainer=None) conv1_a = layer_db.find_layer_by_name('conv1.0') conv1_b = layer_db.find_layer_by_name('conv1.1') self.assertEqual((5, 1), conv1_a.module.kernel_size) self.assertEqual(1, conv1_a.module.in_channels) self.assertEqual(2, conv1_a.module.out_channels) self.assertEqual((1, 5), conv1_b.module.kernel_size) self.assertEqual(2, conv1_b.module.in_channels) self.assertEqual(32, conv1_b.module.out_channels) conv2_a = layer_db.find_layer_by_name('conv2.0') conv2_b = layer_db.find_layer_by_name('conv2.1') self.assertEqual((5, 1), conv2_a.module.kernel_size) self.assertEqual(32, conv2_a.module.in_channels) self.assertEqual(53, conv2_a.module.out_channels) self.assertEqual((1, 5), conv2_b.module.kernel_size) self.assertEqual(53, conv2_b.module.in_channels) self.assertEqual(64, conv2_b.module.out_channels) self.assertTrue(isinstance(layer_db.model.conv1, torch.nn.Sequential)) self.assertTrue(isinstance(layer_db.model.conv2, torch.nn.Sequential)) for layer in layer_db: print("Layer: " + layer.name) print(" Module: " + str(layer.module)) print(layer_db.model)
def test_per_layer_eval_scores(self): url, process = start_bokeh_server_session(8006) bokeh_session = BokehServerSession(url=url, session_id="compression") pruner = unittest.mock.MagicMock() eval_func = unittest.mock.MagicMock() model = mnist_torch_model.Net().to('cpu') layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) layer1 = layer_db.find_layer_by_name('conv1') layer_db.mark_picked_layers([layer1]) eval_func.side_effect = [90, 80, 70, 60, 50, 40, 30, 20, 10] # Instantiate child greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), eval_func, 20, CostMetric.mac, 0.5, 10, True, None, None, False, bokeh_session=None) progress_bar = ProgressBar(1, "eval scores", "green", bokeh_session=bokeh_session) data_table = DataTable(num_columns=3, num_rows=1, column_names=[ '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9' ], row_index_names=[layer1.name], bokeh_session=bokeh_session) pruner.prune_model.return_value = layer_db eval_dict = greedy_algo._compute_layerwise_eval_score_per_comp_ratio_candidate( data_table, progress_bar, layer1) self.assertEqual(90, eval_dict[Decimal('0.1')]) bokeh_session.server_session.close("test complete") os.killpg(os.getpgid(process.pid), signal.SIGTERM)
def test_quantizer_with_ignoring_layers(self): torch.cuda.empty_cache() net = mnist_model.Net() model = net.to(torch.device('cpu')) quantizer = q.Quantizer(model=model, use_cuda=False) layers_to_ignore = [net.conv1, net.fc2] quantizer.quantize_net(bw_params=8, bw_acts=8, run_model=mnist_model.evaluate, iterations=1, layers_to_ignore=layers_to_ignore) self.assertTrue(isinstance(net.conv1, nn.Conv2d)) self.assertFalse(isinstance(net.conv2, nn.Conv2d)) self.assertTrue(isinstance(net.fc2, nn.Linear)) print("Quantized Model", model)
def test_prune_layer(self): model = mnist_torch_model.Net() # Create a layer database orig_layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) # Copy the db comp_layer_db = copy.deepcopy(orig_layer_db) conv1 = comp_layer_db.find_layer_by_name('conv1') spatial_svd_pruner = SpatialSvdPruner() spatial_svd_pruner._prune_layer(orig_layer_db, comp_layer_db, conv1, 0.5, CostMetric.mac) conv1_a = comp_layer_db.find_layer_by_name('conv1.0') conv1_b = comp_layer_db.find_layer_by_name('conv1.1') self.assertEqual((5, 1), conv1_a.module.kernel_size) self.assertEqual(1, conv1_a.module.in_channels) self.assertEqual(2, conv1_a.module.out_channels) self.assertEqual((1, 5), conv1_b.module.kernel_size) self.assertEqual(2, conv1_b.module.in_channels) self.assertEqual(32, conv1_b.module.out_channels) self.assertTrue( isinstance(comp_layer_db.model.conv1, torch.nn.Sequential)) for layer in comp_layer_db: print("Layer: " + layer.name) print(" Module: " + str(layer.module)) print(comp_layer_db.model) # check the output shapes of two newly created split layers # first split layer output conv1_a_output = comp_layer_db.model.conv1[0](torch.rand(1, 1, 28, 28)) # second split layer output conv1_b_output = comp_layer_db.model.conv1[1](conv1_a_output) self.assertEqual(conv1_a.output_shape, list(conv1_a_output.shape)) self.assertEqual(conv1_b.output_shape, list(conv1_b_output.shape))
def test_eval_scores_with_spatial_svd_pruner(self): pruner = SpatialSvdPruner() eval_func = unittest.mock.MagicMock() eval_func.side_effect = [ 90, 80, 70, 60, 50, 40, 30, 20, 10, 91, 81, 71, 61, 51, 41, 31, 21, 11 ] model = mnist_torch_model.Net() # Create a layer database layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) layer1 = layer_db.find_layer_by_name('conv1') layer2 = layer_db.find_layer_by_name('conv2') layer_db.mark_picked_layers([layer1, layer2]) # Instantiate child greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), eval_func, 20, CostMetric.mac, 0.5, 10, True, None, None, True, bokeh_session=None) dict = greedy_algo._compute_eval_scores_for_all_comp_ratio_candidates() print() print(dict) self.assertEqual(90, dict['conv1'][Decimal('0.1')]) self.assertEqual(51, dict['conv2'][Decimal('0.5')]) self.assertEqual(21, dict['conv2'][Decimal('0.8')])
def test_calculate_channel_pruning_cost_all_layers(self): model = mnist_model.Net().to("cpu") print(model) layer_database = lad.LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) # Compress all layers by 50% # Create a list of tuples of (layer, comp_ratio) layer_ratio_list = [] # Unfortunately in mnist we can only input channel prune conv2 for layer in layer_database: if layer.module is model.conv2: layer_ratio_list.append( LayerCompRatioPair(layer, Decimal('0.5'))) else: layer_ratio_list.append(LayerCompRatioPair(layer, None)) # Create the Input channel pruner dataset_size = 1000 batch_size = 10 # create fake data loader with image size (1, 28, 28) data_loader = self.create_fake_data_loader(dataset_size=dataset_size, batch_size=batch_size) pruner = InputChannelPruner(data_loader=data_loader, input_shape=(1, 1, 28, 28), num_reconstruction_samples=10, allow_custom_downsample_ops=True) cost_calculator = ChannelPruningCostCalculator(pruner) compressed_cost = cost_calculator.calculate_compressed_cost( layer_database, layer_ratio_list, CostMetric.mac) self.assertEqual(8552704, compressed_cost.mac)
def test_prune_model_2_layers(self): model = mnist_model.Net() # Create a layer database layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) fc1 = layer_db.find_layer_by_name('fc1') conv2 = layer_db.find_layer_by_name('conv2') pruner = WeightSvdPruner() layer_db = pruner.prune_model(layer_db, [LayerCompRatioPair(fc1, Decimal(0.5)), LayerCompRatioPair(conv2, Decimal(0.5))], aimet_common.defs.CostMetric.mac, trainer=None) fc1_a = layer_db.find_layer_by_name('fc1.0') fc1_b = layer_db.find_layer_by_name('fc1.1') self.assertEqual(3136, fc1_a.module.in_features) self.assertEqual(1024, fc1_b.module.out_features) conv2_a = layer_db.find_layer_by_name('conv2.0') conv2_b = layer_db.find_layer_by_name('conv2.1') self.assertEqual((1, 1), conv2_a.module.kernel_size) self.assertEqual(32, conv2_a.module.in_channels) self.assertEqual(15, conv2_a.module.out_channels) self.assertEqual((5, 5), conv2_b.module.kernel_size) self.assertEqual(15, conv2_b.module.in_channels) self.assertEqual(64, conv2_b.module.out_channels) self.assertTrue(isinstance(layer_db.model.fc1, nn.Sequential)) self.assertTrue(isinstance(layer_db.model.conv2, nn.Sequential)) for layer in layer_db: print("Layer: " + layer.name) print(" Module: " + str(layer.module)) print(layer_db.model)
def test_retraining_on_quantized_model_first_step(self): torch.cuda.empty_cache() model = mnist_model.Net().to(torch.device('cuda')) sim = QuantizationSimModel(model, default_output_bw=4, default_param_bw=4, dummy_input=torch.rand(1, 1, 28, 28).cuda()) # Quantize the untrained MNIST model sim.compute_encodings(self.forward_pass, forward_pass_callback_args=5) # train the model for entire one epoch mnist_model.train(model=sim.model, epochs=1, num_batches=3, batch_callback=check_if_layer_weights_are_updating, use_cuda=True) # Checkpoint the model save_checkpoint(sim, os.path.join(path, 'checkpoint.pt'))
def test_split_conv_layer_with_mo(self): logger.debug(self.id()) model = mnist_model.Net().to("cpu") layer_database = LayerDatabase(model=model, input_shape=(1, 1, 28, 28)) with unittest.mock.patch('aimet_torch.svd.layer_selector_deprecated.LayerSelectorDeprecated'): svd = s.SvdImpl(model=model, run_model=mnist_model.evaluate, run_model_iterations=1, input_shape=(1, 1, 28, 28), compression_type=aimet_torch.svd.svd_intf_defs_deprecated.CompressionTechnique.svd, cost_metric=aimet_torch.svd.svd_intf_defs_deprecated.CostMetric.memory, layer_selection_scheme=aimet_torch.svd.svd_intf_defs_deprecated.LayerSelectionScheme.top_n_layers, num_layers=2) conv2 = layer_database.find_layer_by_module(model.conv2) pymo_utils.PymoSvdUtils.configure_layers_in_pymo_svd([conv2], aimet_common.defs.CostMetric.mac, svd._svd_lib_ref) split_layer = svd_pruner_deprecated.DeprecatedSvdPruner seq, conv_a, conv_b = split_layer.prune_layer(conv2, 28, svd._svd_lib_ref) print('\n') weight_arr = conv_a.module.weight.detach().numpy().flatten() weight_arr = weight_arr[0:10] print(weight_arr) self.assertEqual((28, model.conv2.in_channels, 1, 1), conv_a.module.weight.shape) self.assertEqual([28], list(conv_a.module.bias.shape)) self.assertEqual((model.conv2.out_channels, 28, 5, 5), conv_b.module.weight.shape) self.assertEqual([model.conv2.out_channels], list(conv_b.module.bias.shape)) self.assertEqual(model.conv2.stride, conv_a.module.stride) self.assertEqual(model.conv2.stride, conv_b.module.stride) self.assertEqual((0, 0), conv_a.module.padding) self.assertEqual(model.conv2.padding, conv_b.module.padding) self.assertEqual((1, 1), conv_a.module.kernel_size) self.assertEqual(model.conv2.kernel_size, conv_b.module.kernel_size)
def test_with_finetuning(self): torch.cuda.empty_cache() model = mnist_model.Net().to(torch.device('cuda')) mnist_torch_model.evaluate(model=model, iterations=None, use_cuda=True) sim = QuantizationSimModel(model, dummy_input=torch.rand(1, 1, 28, 28).cuda()) # Quantize the untrained MNIST model sim.compute_encodings(self.forward_pass, forward_pass_callback_args=5) # Run some inferences mnist_torch_model.evaluate(model=sim.model, iterations=None, use_cuda=True) # train the model again mnist_model.train(sim.model, epochs=1, num_batches=3, batch_callback=check_if_layer_weights_are_updating, use_cuda=True)
def quantize_model(trainer_function): model = mnist_torch_model.Net().to(torch.device('cuda')) sim = QuantizationSimModel( model, default_output_bw=8, default_param_bw=8, dummy_input=torch.rand(1, 1, 28, 28), config_file= '../../../TrainingExtensions/common/src/python/aimet_common/quantsim_config/' 'default_config.json') # Quantize the untrained MNIST model sim.compute_encodings(forward_pass_callback=evaluate_model, forward_pass_callback_args=5) # Fine-tune the model's parameter using training trainer_function(model=sim.model, epochs=1, num_batches=100, use_cuda=True) # Export the model sim.export(path='./', filename_prefix='quantized_mnist', dummy_input=torch.rand(1, 1, 28, 28))
def test_comp_ratio_select_tar(self): compute_model_cost = unittest.mock.MagicMock() pruner = unittest.mock.MagicMock() eval_func = unittest.mock.MagicMock() eval_func.side_effect = [ 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.97, 1.0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.97, 1.0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.97, 1.0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.97, 1.0 ] compute_model_cost.return_value = (500, 500) compute_network_cost = unittest.mock.MagicMock() compute_network_cost.return_value = (500, 500) model = mnist_torch_model.Net().to('cpu') layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) layer1 = layer_db.find_layer_by_name('conv2') layer_db.mark_picked_layers([layer1]) layer2 = layer_db.find_layer_by_name('fc2') layer_db.mark_picked_layers([layer2]) layer3 = layer_db.find_layer_by_name('fc1') layer_db.mark_picked_layers([layer3]) # Instantiate child tar_algo = comp_ratio_select.TarRankSelectAlgo( layer_db=layer_db, pruner=pruner, cost_calculator=WeightSvdCostCalculator(), eval_func=eval_func, eval_iterations=20, cost_metric=CostMetric.mac, num_rank_indices=20, use_cuda=False, pymo_utils_lib=pymo_utils) tar_algo._svd_lib_ref = create_autospec(pymo.Svd, instance=True) tar_algo._svd_lib_ref.SetCandidateRanks = unittest.mock.MagicMock() tar_algo._svd_lib_ref.SetCandidateRanks.return_value = 20 tar_algo._num_rank_indices = 20 with unittest.mock.patch( 'aimet_common.cost_calculator.CostCalculator.calculate_comp_ratio_given_rank' ) as calculate_comp_ratio_given_rank: calculate_comp_ratio_given_rank.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 ] layer_comp_ratio_list, stats = tar_algo.select_per_layer_comp_ratios( ) self.assertEqual(layer_comp_ratio_list[2].eval_score, 0.97) self.assertEqual(layer_comp_ratio_list[2].comp_ratio, 1.0)
# Dependency #1 # # Some Acceptance tests depend on a trained MNIST model. # # Check if we need to generate the .pth for CPU or GPU. If not, return cpu_output_files = os.path.join('./', 'data', 'mnist_trained_on_CPU.pth') gpu_output_files = os.path.join('./', 'data', 'mnist_trained_on_GPU.pth') if os.path.isfile(cpu_output_files) or os.path.isfile(gpu_output_files): logger.info('Mnist model .pth generation not needed') else: torch.manual_seed(1) torch.backends.cudnn.deterministic = True if use_cuda: model = mnist_torch_model.Net().to("cuda") else: model = mnist_torch_model.Net().to("cpu") mnist_torch_model.train(model, epochs=1, use_cuda=use_cuda, batch_size=50, batch_callback=None) # create directory if not os.path.isdir('./data'): os.mkdir('./data') if use_cuda: torch.save(model, gpu_output_files)
def test_and_compare_quantizer_no_fine_tuning_CPU_and_GPU(self): torch.manual_seed(1) torch.backends.cudnn.deterministic = True dummy_input = torch.rand(1, 1, 28, 28) dummy_input_cuda = dummy_input.cuda() start_time = time.time() # create model on CPU model_cpu = mnist_model.Net().to('cpu') model_gpu = copy.deepcopy(model_cpu).to('cuda') cpu_sim_model = QuantizationSimModel(model_cpu, quant_scheme='tf', in_place=True, dummy_input=dummy_input) # Quantize cpu_sim_model.compute_encodings(forward_pass, None) print("Encodings for cpu model calculated") print("Took {} secs".format(time.time() - start_time)) start_time = time.time() # create model on GPU gpu_sim_model = QuantizationSimModel(model_gpu, quant_scheme='tf', in_place=True, dummy_input=dummy_input_cuda) # Quantize gpu_sim_model.compute_encodings(forward_pass, None) print("Encodings for gpu model calculated") print("Took {} secs".format(time.time() - start_time)) # check the encodings only min and max # Test that first and second are approximately (or not approximately) # equal by computing the difference, rounding to the given number of # decimal places (default 7), and comparing to zero. Note that these # methods round the values to the given number of decimal places # (i.e. like the round() function) and not significant digits # excluding fc1 since it is part of Matmul->Relu supergroup # can't use assertEqual for FC2, so using assertAlmostEquals for FC2 self.assertAlmostEqual( model_gpu.conv1.output_quantizers[0].encoding.min, model_cpu.conv1.output_quantizers[0].encoding.min, delta=0.001) self.assertAlmostEqual( model_gpu.conv1.output_quantizers[0].encoding.max, model_cpu.conv1.output_quantizers[0].encoding.max, delta=0.001) self.assertAlmostEqual( model_gpu.conv2.output_quantizers[0].encoding.min, model_cpu.conv2.output_quantizers[0].encoding.min, delta=0.001) self.assertAlmostEqual( model_gpu.conv2.output_quantizers[0].encoding.max, model_cpu.conv2.output_quantizers[0].encoding.max, delta=0.001) self.assertAlmostEqual(model_gpu.fc2.output_quantizers[0].encoding.min, model_cpu.fc2.output_quantizers[0].encoding.min, delta=0.001) self.assertAlmostEqual(model_gpu.fc2.output_quantizers[0].encoding.max, model_cpu.fc2.output_quantizers[0].encoding.max, delta=0.001) gpu_sim_model.export("./data/", "quantizer_no_fine_tuning__GPU", dummy_input) cpu_sim_model.export("./data/", "quantizer_no_fine_tuning__CPU", dummy_input) self.assertEqual(torch.device('cuda:0'), next(model_gpu.parameters()).device) self.assertEqual(torch.device('cpu'), next(model_cpu.parameters()).device)
def test_select_per_layer_comp_ratios(self): pruner = unittest.mock.MagicMock() eval_func = unittest.mock.MagicMock() rounding_algo = unittest.mock.MagicMock() rounding_algo.round.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] eval_func.side_effect = [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 11, 21, 31, 35, 40, 45, 50, 55, 60 ] model = mnist_torch_model.Net() layer_db = LayerDatabase(model, input_shape=(1, 1, 28, 28)) layer1 = layer_db.find_layer_by_name('conv1') layer2 = layer_db.find_layer_by_name('conv2') selected_layers = [layer1, layer2] layer_db.mark_picked_layers([layer1, layer2]) try: os.remove('./data/greedy_selection_eval_scores_dict.pkl') except OSError: pass # Instantiate child greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), eval_func, 20, CostMetric.mac, Decimal(0.6), 10, True, None, rounding_algo, False, bokeh_session=None) layer_comp_ratio_list, stats = greedy_algo.select_per_layer_comp_ratios( ) original_cost = SpatialSvdCostCalculator.compute_model_cost(layer_db) for layer in layer_db: if layer not in selected_layers: layer_comp_ratio_list.append(LayerCompRatioPair(layer, None)) compressed_cost = SpatialSvdCostCalculator.calculate_compressed_cost( layer_db, layer_comp_ratio_list, CostMetric.mac) rounding_algo.round.side_effect = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] actual_compression_ratio = compressed_cost.mac / original_cost.mac self.assertTrue( math.isclose(Decimal(0.6), actual_compression_ratio, abs_tol=0.05)) self.assertTrue( os.path.isfile('./data/greedy_selection_eval_scores_dict.pkl')) print('\n') for pair in layer_comp_ratio_list: print(pair) # lets repeat with a saved eval_dict greedy_algo = comp_ratio_select.GreedyCompRatioSelectAlgo( layer_db, pruner, SpatialSvdCostCalculator(), eval_func, 20, CostMetric.mac, Decimal(0.6), 10, True, './data/greedy_selection_eval_scores_dict.pkl', rounding_algo, False, bokeh_session=None) layer_comp_ratio_list, stats = greedy_algo.select_per_layer_comp_ratios( ) original_cost = SpatialSvdCostCalculator.compute_model_cost(layer_db) for layer in layer_db: if layer not in selected_layers: layer_comp_ratio_list.append(LayerCompRatioPair(layer, None)) compressed_cost = SpatialSvdCostCalculator.calculate_compressed_cost( layer_db, layer_comp_ratio_list, CostMetric.mac) actual_compression_ratio = compressed_cost.mac / original_cost.mac self.assertTrue( math.isclose(Decimal(0.6), actual_compression_ratio, abs_tol=0.05)) print('\n') for pair in layer_comp_ratio_list: print(pair)