def _schedule_to_training(self): for i, elm in enumerate(self.registry): pred, training = elm qpred_prev = pred # Remove recorder modifiers = [] modifiers.append(GC.RemoveFunctionModifier( rm_funcs=[self.config.recorder_activation().name(), self.config.recorder_weight().name()])) qpred_noqnn = GC.GraphConverter(modifiers).convert(qpred_prev) qpred_prev.rewire_on(qpred_noqnn) # Collect functions rank rank_recorder = FunctionsRankRecorder() qpred_prev.visit(rank_recorder) # Recording to training qpred_curr = GC.GraphConverter([ GC.QuantizeRecordingToTrainingModifier(rank_recorder.functions_ranks, config=self.config)]).convert( qpred_prev) qpred_prev.rewire_on(qpred_curr) self._register_params(self.solver) self._set_qat_learning_rate() self.registry[i] = (qpred_prev, training) self.state = QNNState.TRAINING print( 'QNNState.RECORDING -> QNNState.TRAINING: graph={}'.format(qpred_prev))
def test_channel_first(ctx, func_name, seed, test, graph_ref, graph_act): from .graph_converter_test_utils import structure_tester, value_tester if not func_name.endswith('Cudnn'): pytest.skip( "ChannelFirst/Last conversion is only supported in cuDNN context.") with nn.context_scope(ctx): # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 32, 32, 3) x = nn.Variable.from_numpy_array(x_data) y_tgt = graph_act(x, test=test) # FunctionModifier modifiers = [] inputs = [x] outputs = [y_tgt] modifiers.append(GC.ChannelFirstModifier(inputs)) y_act = GC.GraphConverter(modifiers).convert(outputs) # Ref Graph x_data_cf = rng.randn(batch_size, 3, 32, 32) x_cf = nn.Variable.from_numpy_array(x_data_cf) y_ref = graph_ref(x_cf, test=test) # Test structure_tester(y_ref, y_act[0]) value_tester(y_tgt, y_act[0], rtol=6e-02, atol=5e-02)
def test_batch_normalization_folding(ctx, func_name, seed, test, w_bias, channel_last, graph_ref, graph_act, opposite, dims): from .graph_converter_test_utils import structure_tester, value_tester if (channel_last == True or dims != 2) and not func_name.endswith('Cudnn'): pytest.skip( 'ChannelLast conversion and 1D,3D Conv/Deconv conversion are only supported in cuDNN context.') with nn.context_scope(ctx): # Random number np.random.seed(seed) rng = np.random.RandomState(seed) input_shape = (batch_size,) + (32,) * dims + \ (3,) if channel_last else (batch_size,) + (3,) + (32,) * dims # Graph x_data = rng.randn(*input_shape) x = nn.Variable.from_numpy_array(x_data) y_tgt = graph_act(x, test=test, w_bias=w_bias, channel_last=channel_last, dims=dims) # FunctionModifier modifiers = [] modifiers.append(GC.BatchNormalizationFoldingModifier( opposite, channel_last)) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph y_ref = graph_ref(x, channel_last=channel_last, dims=dims) # Test structure_tester(y_ref, y_act) value_tester(y_tgt, y_act, rtol=6e-04, atol=5e-04)
def test_test_mode(seed, test, w_bias, graph_ref, graph_act, rm_func): from .graph_converter_test_utils import structure_tester, value_tester # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 3, 32, 32) x = nn.Variable.from_numpy_array(x_data) y_tgt = graph_act(x, test=test, w_bias=w_bias) # Remove function rm_funcs = ['BatchNormalization', 'MulScalar'] if rm_func == True else [] # FunctionModifier modifiers = [] modifiers.append(GC.TestModeModifier(rm_funcs)) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph y_ref = graph_ref(x, w_bias=w_bias) # Test structure_tester(y_ref, y_act) if not rm_func: value_tester(y_ref, y_act, rtol=6e-02, atol=5e-02)
def test_unfused_batch_normalization(seed, test, graph_ref, graph_act): from .graph_converter_test_utils import structure_tester, value_tester # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 3, 32, 32) x = nn.Variable.from_numpy_array(x_data) y_tgt = graph_act(x, test=test) # FunctionModifier modifiers = [] modifiers.append( GC.RemoveFunctionModifier( rm_funcs=['BatchNormalization', 'MulScalar'])) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph y_ref = graph_ref(x, test=test, name='bn-graph-rm-ref') # Test structure_tester(y_ref, y_act)
def test_batch_normalization_folding(seed, test, w_bias, graph_ref, graph_act): from .graph_converter_test_utils import structure_tester, value_tester # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 3, 32, 32) x = nn.Variable.from_numpy_array(x_data) y_tgt = graph_act(x, test=test, w_bias=w_bias) # FunctionModifier modifiers = [] modifiers.append(GC.BatchNormalizationFoldingModifier()) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph y_ref = graph_ref(x, test=test, name='bnfolding-graph-ref') # Test structure_tester(y_ref, y_act) value_tester(y_tgt, y_act, rtol=6e-02, atol=5e-02)
def test_multi_inputs_outputs(seed, w_bias, test, graph_ref, graph_act): from .graph_converter_test_utils import structure_tester, value_tester # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 3, 32, 32) x = nn.Variable.from_numpy_array(x_data) z_data = rng.randn(batch_size, 3, 32, 32) z = nn.Variable.from_numpy_array(z_data) y_tgt = graph_act([x, z], w_bias=w_bias, test=test) # FunctionModifier modifiers = [] modifiers.append(GC.BatchNormalizationFoldingModifier()) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph y_ref = graph_ref([x, z], test=test) # Test for ref, act, tgt in zip(y_ref, y_act, y_tgt): structure_tester(ref, act) value_tester(tgt, act, rtol=6e-02, atol=5e-02)
def test_channel_last(ctx, func_name, seed, test, graph_ref, graph_act): from .graph_converter_test_utils import structure_tester, value_tester if not func_name.endswith('Cudnn'): pytest.skip( 'ChannelFirst/Last conversion is only supported in cuDNN context.') with nn.context_scope(ctx): # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 3, 32, 32) x = nn.Variable.from_numpy_array(x_data) y_tgt = graph_act(x, test=test) # FunctionModifier modifiers = [] inputs = [x] outputs = [y_tgt] modifiers.append(GC.ChannelLastModifier(inputs)) y_act = GC.GraphConverter(modifiers).convert(outputs) # class PrintFunc(): # def __call__(self, f): # print(f.info.type_name) # print(f.outputs[0].shape) # print(np.sum(f.outputs[0].d)) # print('--- Act ---') # y_act[0].forward() # y_act[0].visit(PrintFunc()) # print('--- Tgt ---') # y_tgt.forward() # y_tgt.visit(PrintFunc()) # for k, v in nn.get_parameters().items(): # print(k, v.d) # Ref Graph x_data_cl = rng.randn(batch_size, 32, 32, 3) x_cl = nn.Variable.from_numpy_array(x_data_cl) y_ref = graph_ref(x_cl, test=test) # Test structure_tester(y_ref, y_act[0]) value_tester(y_tgt, y_act[0], rtol=6e-02, atol=5e-02)
def test_pruning(ctx, func_name, seed, threshold, w_bias, channel_last, functions_to_pruning, graph): from .graph_converter_test_utils import structure_tester, value_tester if channel_last == True and not func_name.endswith('Cudnn'): pytest.skip( 'ChannelLast conversion is only supported in cuDNN context.') # The Deconv in Cuda is strange, it causes the test to fail. if func_name.endswith('Cuda'): pytest.skip('Skip test with Cuda context.') with nn.context_scope(ctx): # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 16, 16, 16) x1 = nn.Variable.from_numpy_array(x_data) x2 = nn.Variable.from_numpy_array(x_data) nn.random.seed(seed) y_tgt = graph(x1, threshold, with_bias=w_bias, channel_last=channel_last, name_scope='net1') # Pruning with FunctionModifier modifiers = [] modifiers.append( GC.PruningModifier(threshold, functions_to_pruning, channel_last)) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph nn.random.seed(seed) y_ref = graph(x2, threshold, with_bias=w_bias, channel_last=channel_last, name_scope='net2') # Pruning manually pruning_inplace(y_ref, threshold, functions_to_pruning, channel_last) # Test structure_tester(y_ref, y_act) # Compare the results of manual pruning and pruning with GraphConverter # The results should be very close value_tester(y_ref, y_act, rtol=2e-6, atol=3e-8)
def _fold_bn(self, pred): qpred_prev = pred # BN folding & BN self folding modifiers = [] if not self.config.bn_folding else [GC.BatchNormalizationFoldingModifier( opposite=False, channel_last=self.config.channel_last), GC.BatchNormalizationFoldingModifier( opposite=True, channel_last=self.config.channel_last)] modifiers = modifiers + \ [GC.BatchNormalizationSelfFoldingModifier( )] if self.config.bn_self_folding else modifiers if len(modifiers) > 0: # expand fused_batch_normalization if BN folding or BN self folding is enabled. modifiers.insert(0, GC.UnfusedBatchNormalizationModifier()) qpred_without_bn = GC.GraphConverter( modifiers).convert(qpred_prev) qpred_prev.rewire_on(qpred_without_bn) return qpred_prev
def _schedule_to_recording(self): for i, elm in enumerate(self.registry): pred, training = elm qpred_prev = pred qpred_prev = self._fold_bn(qpred_prev) # Collect functions rank rank_recorder = FunctionsRankRecorder() qpred_prev.visit(rank_recorder) qpred_curr = GC.GraphConverter([ GC.QuantizeNonQNNToRecordingModifier( rank_recorder.functions_ranks, config=self.config, training=training)]).convert(qpred_prev) qpred_prev.rewire_on(qpred_curr) qpred_prev.need_grad = False self._register_params(self.solver) self.registry[i] = (qpred_prev, training) self.state = QNNState.RECORDING print( 'QNNState.NON_QNN -> QNNState.RECORDING: graph={}'.format(qpred_prev))
def test_batch_normalization_folding(ctx, func_name, seed, test, w_bias, channel_last, graph_ref, graph_act, opposite): from .graph_converter_test_utils import structure_tester, value_tester if channel_last == True and not func_name.endswith('Cudnn'): pytest.skip( 'ChannelLast conversion is only supported in cuDNN context.') with nn.context_scope(ctx): # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 32, 32, 3) if channel_last == True else rng.randn( batch_size, 3, 32, 32) x = nn.Variable.from_numpy_array(x_data) y_tgt = graph_act(x, test=test, w_bias=w_bias, channel_last=channel_last) # FunctionModifier modifiers = [] modifiers.append( GC.BatchNormalizationFoldingModifier(opposite, channel_last)) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph y_ref = graph_ref(x, test=test, channel_last=channel_last) # Test structure_tester(y_ref, y_act) value_tester(y_tgt, y_act, rtol=6e-02, atol=5e-02)
def test_identity(seed, test, diff_batchsize, graph_ref, graph_act): from .graph_converter_test_utils import structure_tester, value_tester # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 3, 32, 32) x = nn.Variable.from_numpy_array(x_data) x1_data = rng.randn(128, 3, 32, 32) x1 = nn.Variable.from_numpy_array(x1_data) # Alter value and copy option inp_x = x cp_val = True if diff_batchsize: inp_x = x1 cp_val = False y_tgt = graph_act(x, test=test) # FunctionModifier modifiers = [] modifiers.append(GC.IdentityModifier({x: inp_x}, copy_value=cp_val)) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph y_ref = graph_ref(inp_x, test=test) # Test structure_tester(y_ref, y_act) if diff_batchsize == False: value_tester(y_tgt, y_act, rtol=6e-02, atol=5e-02)
def test_nonqnn_to_recording(ctx, func_name, seed, test, w_bias, channel_last, graph_ref, graph_act, folding, self_folding, rec_lays, rec_pos, skip_lays): from .graph_converter_test_utils import structure_tester, value_tester if channel_last == True and not func_name.endswith('Cudnn'): pytest.skip( 'ChannelLast conversion is only supported in cuDNN context.') cfg = QATConfig() cfg.bn_folding = folding cfg.bn_self_folding = self_folding cfg.channel_last = channel_last cfg.record_layers = rec_lays cfg.recorder_position = rec_pos cfg.skip_inputs_layers = skip_lays cfg.skip_outputs_layers = skip_lays with nn.context_scope(ctx): # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph x_data = rng.randn(batch_size, 32, 32, 3) if channel_last == True \ else rng.randn(batch_size, 3, 32, 32) x = nn.Variable.from_numpy_array(x_data) y_tgt = graph_act(x, test=test, w_bias=w_bias, channel_last=channel_last) # BN folding & BN self folding modifiers = [] if cfg.bn_folding: modifiers.append( GC.BatchNormalizationFoldingModifier( opposite=False, channel_last=cfg.channel_last)) modifiers.append( GC.BatchNormalizationFoldingModifier( opposite=True, channel_last=cfg.channel_last)) # Go through BN self folding if cfg.bn_self_folding: modifiers.append(GC.BatchNormalizationSelfFoldingModifier()) if len(modifiers) > 0: y_tgt_without_bn = GC.GraphConverter(modifiers).convert(y_tgt) y_tgt.rewire_on(y_tgt_without_bn) # FunctionModifier funcrankrecorder = FunctionsRankRecorder() y_tgt.visit(funcrankrecorder) modifiers = [] modifiers.append( GC.QuantizeNonQNNToRecordingModifier( funcrankrecorder.functions_ranks, config=cfg)) y_act = GC.GraphConverter(modifiers).convert(y_tgt) # Ref Graph y_ref = graph_ref(x, cfg, test=test, channel_last=channel_last, bn_self_folding=self_folding, record_layers=rec_lays) # Test structure_tester(y_ref, y_act) value_tester(y_tgt, y_act, rtol=6e-02, atol=5e-02)
def test_recording_to_training(ctx, func_name, seed, precision_mode, graph_ref, graph_act): from .graph_converter_test_utils import structure_tester, value_tester cfg = QATConfig() cfg.bn_folding = True cfg.bn_self_folding = True cfg.channel_last = False cfg.precision_mode = precision_mode cfg.skip_inputs_layers = [] cfg.skip_outputs_layers = [] # Random number np.random.seed(seed) rng = np.random.RandomState(seed) # Graph with nn.context_scope(ctx): x_data = rng.randn(batch_size, 3, 32, 32) gt_label = nn.Variable((batch_size, 1)) x = nn.Variable((batch_size, 3, 32, 32)) y_tgt = graph_act(x, test=False, w_bias=True) loss = F.mean(F.softmax_cross_entropy(y_tgt, gt_label)) solver = S.Adam(0.001) solver.set_parameters(nn.get_parameters(grad_only=True)) # train the float32 network for i in range(100): input_data = np.random.random((batch_size, 3, 32, 32)) input_label = np.random.randint(0, 10, size=(batch_size, 1)) gt_label.d = input_label x.d = input_data loss.forward() loss.backward() solver.update() # BN folding & BN self folding modifiers = [] if cfg.bn_folding: modifiers.append( GC.BatchNormalizationFoldingModifier( opposite=False, channel_last=cfg.channel_last)) modifiers.append( GC.BatchNormalizationFoldingModifier( opposite=True, channel_last=cfg.channel_last)) # Go through BN self folding if cfg.bn_self_folding: modifiers.append(GC.BatchNormalizationSelfFoldingModifier()) if len(modifiers) > 0: y_tgt_without_bn = GC.GraphConverter(modifiers).convert(y_tgt) y_tgt.rewire_on(y_tgt_without_bn) # convert to recording funcrankrecorder = FunctionsRankRecorder() y_tgt.visit(funcrankrecorder) modifiers = [ GC.QuantizeNonQNNToRecordingModifier( funcrankrecorder.functions_ranks, config=cfg) ] y_act_rec = GC.GraphConverter(modifiers).convert(y_tgt) y_tgt.rewire_on(y_act_rec) y_tgt.need_grad = False # solver.clear_parameters() solver.set_parameters(nn.get_parameters(grad_only=True)) for i in range(100): input_data = np.random.random((batch_size, 3, 32, 32)) input_label = np.random.randint(0, 10, size=(batch_size, 1)) gt_label.d = input_label x.d = input_data loss.forward() loss.backward() solver.update() # Remove recorder modifiers = [] modifiers.append( GC.RemoveFunctionModifier(rm_funcs=[ cfg.recorder_activation().name(), cfg.recorder_weight().name() ])) y_tgt = GC.GraphConverter(modifiers).convert(y_tgt) # Collect functions rank funcrankrecorder = FunctionsRankRecorder() y_tgt.visit(funcrankrecorder) # convert to training modifiers = [ GC.QuantizeRecordingToTrainingModifier( funcrankrecorder.functions_ranks, config=cfg) ] y_act = GC.GraphConverter(modifiers).convert(y_tgt) y_act.forward() # # # Ref Graph y_ref = graph_ref(x, cfg, test=True) # # # Test structure_tester(y_ref, y_act)