def test_compressed_graph_models_hw(desc, hw_config_type): model = desc.model_builder() config = get_basic_quantization_config_with_hw_config_type( hw_config_type.value, input_sample_size=desc.input_sample_sizes) input_info_list = create_input_infos(config) hw_config_path = HWConfig.get_path_to_hw_config(hw_config_type) hw_config = HWConfig.from_json(hw_config_path) compressed_model = NNCFNetwork(model, input_infos=input_info_list) # pylint:disable=protected-access compression_algo_builder = create_compression_algorithm_builders(config)[0] potential_weights_modules =\ compression_algo_builder.get_potential_quantized_modules(compressed_model) prop_graph_solver = QuantizerPropagationSolver(hw_config=hw_config) insertion_point_graph = compressed_model.get_insertion_point_graph() merged_ip_graph = insertion_point_graph.get_ip_graph_with_merged_hw_optimized_operations( hw_config) potential_activations_quantizers = prop_graph_solver.run_on_ip_graph( merged_ip_graph) sketch_graph = compressed_model.get_original_graph() potential_quantizer_graph = prepare_potential_quantizer_graph( sketch_graph, potential_activations_quantizers, potential_weights_modules) check_graph(potential_quantizer_graph, desc.dot_filename, _case_dir(hw_config_type.value), sort_dot_graph=False)
def test_can_load_quant_algo__with_defaults(): model = BasicConvTestModel() config = get_quantization_config_without_range_init() compression_algo_builder_list = create_compression_algorithm_builders(config) assert len(compression_algo_builder_list) == 1 assert isinstance(compression_algo_builder_list[0], QuantizationBuilder) quant_model, _ = create_compressed_model_and_algo_for_test(deepcopy(model), config) model_conv = get_all_modules_by_type(model, 'Conv2d') quant_model_conv = get_all_modules_by_type(quant_model.get_nncf_wrapped_model(), 'NNCFConv2d') assert len(model_conv) == len(quant_model_conv) for module_scope, _ in model_conv.items(): quant_scope = deepcopy(module_scope) # type: Scope quant_scope.pop() quant_scope.push(ScopeElement('NNCFConv2d', 'conv')) assert quant_scope in quant_model_conv.keys() store = [] for op in quant_model_conv[quant_scope].pre_ops.values(): if isinstance(op, (UpdateInputs, UpdateWeight)) and isinstance(op.operand, SymmetricQuantizer): assert op.__class__.__name__ not in store store.append(op.__class__.__name__) assert UpdateWeight.__name__ in store
def create_nncf_model_and_algo_builder(model: NNCFNetwork, config: NNCFConfig, dummy_forward_fn: Callable[[Module], Any] = None, wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None, resuming_state_dict: dict = None): assert isinstance(config, NNCFConfig) NNCFConfig.validate(config) input_info_list = create_input_infos(config) scopes_without_shape_matching = config.get('scopes_without_shape_matching', []) ignored_scopes = config.get('ignored_scopes') target_scopes = config.get('target_scopes') compressed_model = NNCFNetwork(model, input_infos=input_info_list, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs_fn, ignored_scopes=ignored_scopes, target_scopes=target_scopes, scopes_without_shape_matching=scopes_without_shape_matching) should_init = resuming_state_dict is None compression_algo_builder_list = create_compression_algorithm_builders(config, should_init=should_init) return compressed_model, compression_algo_builder_list
def test_gnmt_quantization(_case_config): model = GNMT(vocab_size=32) model = replace_lstm(model) forward_fn_ = gnmt_forward_fn(seq_len=10, batch_size=3, vocab_size=32) config = get_basic_quantization_config(_case_config.quant_type, input_sample_sizes=[3, 10]) config["quantizer_setup_type"] = 'pattern_based' config["compression"].update({ "quantizable_subgraph_patterns": [["linear", "__add__"], ["sigmoid", "__mul__", "__add__"], ["__add__", "tanh", "__mul__"], ["sigmoid", "__mul__"]], "disable_function_quantization_hooks": True, "ignored_scopes": [ "GNMT/ResidualRecurrentEncoder[encoder]/Embedding[embedder]", "GNMT/ResidualRecurrentDecoder[decoder]/Embedding[embedder]" ] }) compressed_model = NNCFNetwork( model, input_infos=create_input_infos(config), dummy_forward_fn=forward_fn_, wrap_inputs_fn=gnmt_wrap_inputs_fn, scopes_without_shape_matching=[ 'GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/' 'BahdanauAttention[attn]' ]) compression_algo_builder_list = create_compression_algorithm_builders( config) for builder in compression_algo_builder_list: compressed_model = builder.apply_to(compressed_model) _ = compressed_model.commit_compression_changes() check_model_graph(compressed_model, 'gnmt_variable.dot', _case_config.graph_dir)