def test_auto_acts_parameter(self):
        # If use_auto_acts is False, then the bounds should be a single scalar that
        # specifies the fixed bound; 'None' by default.
        config = config_schema.get_config(num_blocks=15, use_auto_acts=False)
        self.assertIsNone(config.quant_act.bounds)
        # If use_auto_acts is True, it should have the same structure as the
        # GetBounds.Hyper dataclass.
        config = config_schema.get_config(num_blocks=15, use_auto_acts=True)
        self.assertIn('initial_bound', config.quant_act.bounds)

        # Because the config dict is locked, it shouldn't be possible to change it
        # back to fixed bounds if it was created with use_auto_acts=True.
        with self.assertRaises(TypeError):
            config.quant_act.bounds = 1.0
    def test_precision_propagates(self):
        config = config_schema.get_config(num_blocks=16, use_auto_acts=True)

        # Set the global precision to 4 bits.
        config.prec = 4
        # Set the global half_shift flag to False
        config.half_shift = False
        # Test that this sets the weight and activation to 4 as well.
        self.assertEqual(config.weight_prec, 4)
        self.assertEqual(config.quant_act.prec, 4)
        # Test that this sets the weight_half_shift and act half_shift to False
        self.assertEqual(config.weight_half_shift, False)
        self.assertEqual(config.quant_act.half_shift, False)
        # Test that propagates all the way down to the weight precision of layer
        # types and individual layers. As an example of an individual layer, we take
        # the dense1 matmul of the second block of the decoder.
        conv1_block3 = config.model_hparams.residual_blocks[2].conv_1
        # Meanwhile, 'conv1' represents the generic configuration of all conv1
        # layers throughout the model.
        conv1 = config.residual.conv_1
        self.assertEqual(conv1.weight_prec, 4)
        self.assertEqual(conv1_block3.weight_prec, 4)
        self.assertEqual(conv1.weight_half_shift, False)
        self.assertEqual(conv1.quant_act.half_shift, False)
        self.assertEqual(conv1_block3.weight_half_shift, False)
        self.assertEqual(conv1_block3.quant_act.half_shift, False)

        # Test if we take the same config instance and alter the global precision to
        # 8, it automatically propagates to individual layers.
        config.prec = 8
        self.assertEqual(conv1.weight_prec, 8)
        self.assertEqual(conv1_block3.weight_prec, 8)
        # Test if we take the same config instance and alter the global half_shift
        # to True, it automatically propagates to individual layers.
        config.half_shift = True
        self.assertEqual(conv1.weight_half_shift, True)
        self.assertEqual(conv1.quant_act.half_shift, True)
        self.assertEqual(conv1_block3.weight_half_shift, True)
        self.assertEqual(conv1_block3.quant_act.half_shift, True)

        # Test that the precision can be overridden for a specific layer type. We
        # want to verify that the change doesn't back-propagate back to the global
        # precision field but does propagate down to individual layers of that layer
        # type. We only want changes to fields to automatically propagate down the
        # parameter hierarchy, not up.
        conv1.weight_prec = 2
        self.assertEqual(conv1.weight_prec, 2)
        self.assertEqual(conv1_block3.weight_prec, 2)
        self.assertEqual(config.prec, 8)

        # Now update the precision for just a specific layer and check that it
        # doesn't propagate upwards.
        conv1_block3.weight_prec = 1
        self.assertEqual(conv1_block3.weight_prec, 1)
        self.assertEqual(conv1.weight_prec, 2)
        self.assertEqual(config.prec, 8)
Esempio n. 3
0
def get_base_config(imagenet_type, quant_target):
    """Returns config that sets hyperparameters common to all quant targets.

  Fields in that config can then be overridden to customize a configuration.

  Note that two hyperparameters, model architecture kind and whether to
  automatically find clipping bounds for activations, have to be specified in
  advance as keyword arguments to this function instead of being overridden in
  the returned configdict. That is because these parameters affect the name and
  number of fields in the configdict instance, which can't be changed after
  creation: there will be one set of overridable parameters per layer in the
  configdict, and the field names in the 'quant_act' fields change depending on
  'quant_target'.

  Args:
    imagenet_type: Resnet model architecture.
    quant_target: Given quantization target, helpful for making decision whether
      to get config for automatic bounds alculation for activations or fixed
      bounds.

  Returns:
    A ConfigDict instance suitable for WMT training.
  """
    resnet_layers = imagenet_type.get_residual_layers()
    num_blocks = sum(resnet_layers)

    use_auto_acts = True if quant_target == QuantTarget.weights_and_auto_acts else False
    config = config_schema.get_config(num_blocks=num_blocks,
                                      use_auto_acts=use_auto_acts)
    config.update({
        "base_learning_rate": 0.1,
        "momentum": 0.9,
        "weight_decay": 0.0001,
        "activation_bound_update_freq": -1,
        "activation_bound_start_step": -1,
        "prec": None,
        "quant_type": "fake_quant",
        "weight_quant_granularity": "per_channel"
    })

    proj_layers = [sum(resnet_layers[:x]) for x in range(len(resnet_layers))]
    for idx in range(num_blocks):
        if idx not in proj_layers:
            config.model_hparams.residual_blocks[idx].conv_proj = None
            config.model_hparams.residual_blocks[
                idx].conv_1.quant_act.input_distribution = "positive"

    config.model_hparams.filter_multiplier = 1.
    config.half_shift = False

    return config
Esempio n. 4
0
def get_base_config(imagenet_type, quant_target):
  """Returns config that sets hyperparameters common to all quant targets.

  Fields in that config can then be overridden to customize a configuration.

  Note that two hyperparameters, model architecture kind and whether to
  automatically find clipping bounds for activations, have to be specified in
  advance as keyword arguments to this function instead of being overridden in
  the returned configdict. That is because these parameters affect the name and
  number of fields in the configdict instance, which can't be changed after
  creation: there will be one set of overridable parameters per layer in the
  configdict, and the field names in the 'quant_act' fields change depending on
  'quant_target'.

  Args:
    imagenet_type: Resnet model architecture.
    quant_target: Given quantization target, helpful for making decision whether
      to get config for automatic bounds alculation for activations or fixed
      bounds.

  Returns:
    A ConfigDict instance suitable for WMT training.
  """
  resnet_layers = imagenet_type.get_residual_layers()
  num_blocks = sum(resnet_layers)

  use_auto_acts = True if quant_target == QuantTarget.weights_and_auto_acts else False
  config = config_schema.get_config(
      num_blocks=num_blocks, use_auto_acts=use_auto_acts)
  config.update({
      "base_learning_rate": 0.1,
      "momentum": 0.9,
      "weight_decay": 0.0001,
      "activation_bound_update_freq": -1,
      "activation_bound_start_step": -1,
      "prec": None,
      "quant_type": "fake_quant",
      "weight_quant_granularity": "per_channel",
      "act_function": "relu",
      "shortcut_ch_shrink_method": "none",
      "shortcut_ch_expand_method": "none",
      "shortcut_spatial_method": "none",
      "lr_scheduler": {
          "warmup_epochs": 5,
          "cooldown_epochs": 50,
          "scheduler": "cosine",
          "num_epochs": 250,
          "endlr": 0.0,
          "knee_lr": 1e-5,
          "knee_epochs": 125,
      },
      "optimizer": "sgd",
      "adam": {
          "beta1": 0.9,
          "beta2": 0.999
      },
      "early_stop_steps": -1,  # -1 means no early stop
      "weight_quant_start_step": 0,  # 0 means turned on by default
      "teacher_model": "labels",
      "is_teacher": True,  # by default train the vanilla resnet
      "seed": 0,
  })

  proj_layers = [sum(resnet_layers[:x]) for x in range(len(resnet_layers))]
  for idx in range(num_blocks):
    if idx not in proj_layers:
      config.model_hparams.residual_blocks[idx].conv_proj = None
      config.model_hparams.residual_blocks[
          idx].conv_1.quant_act.input_distribution = "positive"

  config.model_hparams.filter_multiplier = 1.
  config.model_hparams.se_ratio = 0.5
  config.model_hparams.init_group = 32
  config.half_shift = False

  return config
 def test_num_blocks(self, num_blocks):
     config = config_schema.get_config(num_blocks=num_blocks,
                                       use_auto_acts=True)
     self.assertLen(config.model_hparams.residual_blocks, num_blocks)
     self.assertLen(config.model_hparams.residual_blocks, num_blocks)
    def test_schema_matches_expected(self, num_blocks):
        # This tests that the schema of the configdict returned by 'config_schema',
        # once all references are resolved, matches an expected schema. 'Schema'
        # here means the names and structure of fields at each level of the
        # configuration hierarchy. A value of 'None' in the expected schemas defined
        # below indicates a real configuration would have a concrete scalar value
        # there.

        quant_act_schema = {
            'bounds': {
                'initial_bound': None,
                'stddev_coeff': None,
                'absdev_coeff': None,
                'mix_coeff': None,
                'reset_stats': None,
                'ema_coeff': None,
                'use_cams': None,
                'exclude_zeros': None,
                'use_mean_of_max': None,
                'granularity': None,
                'fixed_bound': None,
                'cams_coeff': None,
                'cams_stddev_coeff': None,
                'mean_of_max_coeff': None,
                'use_old_code': None,
            },
            'input_distribution': None,
            'prec': None,
            'half_shift': None,
        }

        dense_schema = {
            'weight_prec': None,
            'weight_quant_granularity': None,
            'quant_type': None,
            'quant_act': quant_act_schema,
            'weight_half_shift': None,
        }

        conv_schema = {
            'weight_prec': None,
            'weight_quant_granularity': None,
            'quant_type': None,
            'quant_act': quant_act_schema,
            'weight_half_shift': None,
        }

        residual_block_schema = {
            'conv_se': conv_schema,
            'conv_proj': conv_schema,
            'conv_1': conv_schema,
            'conv_2': conv_schema,
            'conv_3': conv_schema,
            'act_function': None,
            'shortcut_ch_expand_method': None,
            'shortcut_ch_shrink_method': None,
            'shortcut_spatial_method': None,
        }

        expected_top_level_schema = {
            'metadata': {
                'description': None,
                'hyper_str': None
            },
            'base_learning_rate': None,
            'momentum': None,
            'weight_decay': None,
            'activation_bound_update_freq': None,
            'activation_bound_start_step': None,
            'weight_quant_start_step': None,
            'prec': None,
            'half_shift': None,
            'weight_prec': None,
            'weight_half_shift': None,
            'quant_type': None,
            'quant_act': quant_act_schema,
            'weight_quant_granularity': None,
            'early_stop_steps': None,
            'act_function': None,
            'shortcut_ch_shrink_method': None,
            'shortcut_ch_expand_method': None,
            'shortcut_spatial_method': None,
            'teacher_model': None,
            'is_teacher': None,
            'seed': None,
            'lr_scheduler': {
                'warmup_epochs': None,
                'cooldown_epochs': None,
                'scheduler': None,
                'num_epochs': None,
                'endlr': None,
                'knee_lr': None,
                'knee_epochs': None,
            },
            'optimizer': None,
            'adam': {
                'beta1': None,
                'beta2': None,
            },
            'dense_layer': dense_schema,
            'conv': conv_schema,
            'residual': residual_block_schema,
            'model_hparams': {
                'dense_layer': dense_schema,
                'conv_init': conv_schema,
                'residual_blocks': [residual_block_schema] * num_blocks,
                'filter_multiplier': None,
                'act_function': None,
                'se_ratio': None,
                'init_group': None,
            },
        }

        config = config_schema.get_config(num_blocks=num_blocks,
                                          use_auto_acts=True)
        # This round-trip conversion from JSON forces all references to resolve to
        # concrete values.
        config_reified = json.loads(config.to_json())

        # This test is not interested in checking the specific values of fields in
        # the configuration, but only that the schema of the hierarchies
        # are the same. Thus we all set the value of leaf nodes in the config to
        # 'None' before checking that the actual and expected configuration
        # structures are the same.
        def set_leaves_to_none(config):
            # We are at an intermediate node in the tree-structured input, which could
            # either be in the form of a dictionary or a list of other nodes in the
            # tree.
            if isinstance(config, dict):
                return {
                    key: set_leaves_to_none(value)
                    for key, value in config.items()
                }
            elif isinstance(config, list):
                return [set_leaves_to_none(value) for value in config]

            # We are at a leaf node in the tree-structured input.
            else:
                return None

        self.assertSameStructure(set_leaves_to_none(config_reified),
                                 expected_top_level_schema)