Beispiel #1
0
 def __init__(self,
              in_features: int,
              out_features: int,
              bias: bool,
              weight_quant: Union[
                  WeightQuantProxyProtocol,
                  Type[Injector]] = Int8WeightPerTensorFloat,
              bias_quant: Union[BiasQuantProxyProtocol,
                                Type[Injector]] = FloatBias,
              input_quant: Union[ActQuantProxyProtocol,
                                 Type[Injector]] = None,
              output_quant: Union[ActQuantProxyProtocol,
                                  Type[Injector]] = None,
              return_quant_tensor: bool = False,
              **kwargs) -> None:
     Linear.__init__(self, in_features, out_features, bias)
     QuantWBIOL.__init__(self,
                         weight=self.weight,
                         bias=self.bias,
                         weight_quant=weight_quant,
                         bias_quant=bias_quant,
                         input_quant=input_quant,
                         output_quant=output_quant,
                         return_quant_tensor=return_quant_tensor,
                         **kwargs)
Beispiel #2
0
    def __init__(
            self,
            in_features: int,
            out_features: int,
            bias: bool = True,
            rpu_config: Optional[RPUConfigAlias] = None,
            realistic_read_write: bool = False,
            weight_scaling_omega: Optional[float] = None,
    ):

        # Call super() after tile creation, including ``reset_parameters``.
        Linear.__init__(self, in_features, out_features, bias=bias)

        # Create tiles
        if rpu_config is None:
            rpu_config = SingleRPUConfig()

        AnalogModuleBase.__init__(
            self,
            in_features,
            out_features,
            bias,
            realistic_read_write,
            rpu_config.mapping
        )
        if self.analog_bias:
            raise ModuleError("AnalogLinearMapped only supports digital bias.")

        # More than one tile may need to be created. If so, divide
        # weight matrix into equal pieces along input dimension with
        # as many tiles as needed
        max_input_size = rpu_config.mapping.max_input_size
        max_output_size = rpu_config.mapping.max_output_size

        self.in_sizes = self.get_split_sizes(in_features, max_input_size)
        self.out_sizes = self.get_split_sizes(out_features, max_output_size)

        self.analog_tile_array = []
        for i, in_tile_size in enumerate(self.in_sizes):
            in_tiles = []
            for j, out_tile_size in enumerate(self.out_sizes):
                tile = rpu_config.tile_class(out_tile_size,
                                             in_tile_size,
                                             rpu_config,
                                             bias=self.analog_bias)
                self.register_analog_tile(tile, name=f"{i}_{j}")
                in_tiles.append(tile)
            self.analog_tile_array.append(in_tiles)

        # Set weights from the reset_parameters
        self.set_weights(self.weight, self.bias, remap_weights=True,
                         weight_scaling_omega=weight_scaling_omega)

        # Unregister weight/bias as a parameter but keep for sync
        self.unregister_parameter('weight')

        if self.analog_bias:
            self.unregister_parameter('bias')
Beispiel #3
0
 def __init__(
         self,
         in_features: int,
         out_features: int,
         bias: bool,
         weight_quant: Optional[WeightQuantType] = Int8WeightPerTensorFloat,
         bias_quant: Optional[BiasQuantType] = None,
         input_quant: Optional[ActQuantType] = None,
         output_quant: Optional[ActQuantType] = None,
         return_quant_tensor: bool = False,
         **kwargs) -> None:
     Linear.__init__(self, in_features, out_features, bias)
     QuantWBIOL.__init__(self,
                         weight_quant=weight_quant,
                         bias_quant=bias_quant,
                         input_quant=input_quant,
                         output_quant=output_quant,
                         return_quant_tensor=return_quant_tensor,
                         **kwargs)
Beispiel #4
0
    def __init__(
            self,
            in_features: int,
            out_features: int,
            bias: bool = True,
            rpu_config: Optional[RPUConfigAlias] = None,
            realistic_read_write: bool = False,
            weight_scaling_omega: Optional[float] = None,
              ):
        # Call super() after tile creation, including ``reset_parameters``.
        Linear.__init__(self, in_features, out_features, bias=bias)

        # Create tile
        if rpu_config is None:
            rpu_config = SingleRPUConfig()

        AnalogModuleBase.__init__(
            self,
            in_features,
            out_features,
            bias,
            realistic_read_write,
            weight_scaling_omega,
            rpu_config.mapping
        )
        self.analog_tile = self._setup_tile(rpu_config)

        # Register tile
        self.register_analog_tile(self.analog_tile)

        # Set weights from the reset_parameters call
        self.set_weights(self.weight, self.bias)

        # Unregister weight/bias as a parameter but keep it as a
        # field (needed for syncing still)
        self.unregister_parameter('weight')
        if self.analog_bias:
            self.unregister_parameter('bias')
Beispiel #5
0
 def __init__(self,
              in_features,
              out_features,
              bias=True,
              cast_func=void_cast_func,
              n_train_sample=1):
     BitCenterLayer.__init__(self,
                             fp_functional=F.linear,
                             lp_functional=bit_center_linear,
                             bias=bias,
                             cast_func=cast_func,
                             n_train_sample=n_train_sample)
     Linear.__init__(self,
                     in_features=in_features,
                     out_features=out_features,
                     bias=bias)
     # weight_delta is the delta tensor in the algorithm while weight_lp is the cached
     # lp version of weight offset
     self.setup_bit_center_vars()
     # make sure the variables are on gpu as fp16 is only supported on gpu
     self.cuda()
     self.reset_parameters_bit_center()
     # register backward hook to update gradient caches for output grad
     self.register_backward_hook(self.update_grad_output_cache)
Beispiel #6
0
    def __init__(self,
                 in_features: int,
                 out_features: int,
                 bias: bool,
                 bias_quant_type: QuantType = QuantType.FP,
                 bias_narrow_range: bool = False,
                 bias_bit_width: int = None,
                 weight_quant_override: WeightQuantProxy = None,
                 weight_quant_type: QuantType = QuantType.FP,
                 weight_narrow_range: bool = False,
                 weight_bit_width_impl_override: Union[BitWidthParameter, BitWidthConst] = None,
                 weight_bit_width_impl_type: BitWidthImplType = BitWidthImplType.CONST,
                 weight_restrict_bit_width_type: RestrictValueType = RestrictValueType.INT,
                 weight_bit_width: int = 32,
                 weight_min_overall_bit_width: Optional[int] = 2,
                 weight_max_overall_bit_width: Optional[int] = None,
                 weight_scaling_override: Optional[Module] = None,
                 weight_scaling_impl_type: ScalingImplType = ScalingImplType.STATS,
                 weight_scaling_const: Optional[float] = None,
                 weight_scaling_stats_op: StatsOp = StatsOp.MAX,
                 weight_scaling_per_output_channel: bool = False,
                 weight_scaling_min_val: float = SCALING_MIN_VAL,
                 weight_ternary_threshold: float = 0.5,
                 weight_restrict_scaling_type: RestrictValueType = RestrictValueType.LOG_FP,
                 weight_scaling_stats_sigma: float = 3.0,
                 weight_override_pretrained_bit_width: bool = False,
                 compute_output_scale: bool = False,
                 compute_output_bit_width: bool = False,
                 return_quant_tensor: bool = False) -> None:
        QuantLayer.__init__(self,
                            compute_output_scale=compute_output_scale,
                            compute_output_bit_width=compute_output_bit_width,
                            return_quant_tensor=return_quant_tensor)
        Linear.__init__(self,
                        in_features=in_features,
                        out_features=out_features,
                        bias=bias)
        if weight_quant_type == QuantType.FP and compute_output_bit_width:
            raise Exception("Computing output bit width requires enabling quantization")
        if bias_quant_type != QuantType.FP and not (compute_output_scale and compute_output_bit_width):
            raise Exception("Quantizing bias requires to compute output scale and output bit width")

        self.per_elem_ops = 2 * in_features
        self.weight_reg = WeightReg()

        if weight_quant_override is not None:
            self.weight_quant = weight_quant_override
            self.weight_quant.add_tracked_tensor(self.weight)
        else:
            weight_scaling_stats_input_concat_dim = 1
            if weight_scaling_per_output_channel:
                weight_stats_input_view_shape_impl = StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS
                weight_scaling_shape = (self.out_features, 1)
                weight_scaling_stats_reduce_dim = 1
            else:
                weight_stats_input_view_shape_impl = StatsInputViewShapeImpl.OVER_TENSOR
                weight_scaling_shape = SCALING_SCALAR_SHAPE
                weight_scaling_stats_reduce_dim = None

            self.weight_quant = WeightQuantProxy(bit_width=weight_bit_width,
                                                 quant_type=weight_quant_type,
                                                 narrow_range=weight_narrow_range,
                                                 scaling_override=weight_scaling_override,
                                                 restrict_scaling_type=weight_restrict_scaling_type,
                                                 scaling_const=weight_scaling_const,
                                                 scaling_stats_op=weight_scaling_stats_op,
                                                 scaling_impl_type=weight_scaling_impl_type,
                                                 scaling_stats_reduce_dim=weight_scaling_stats_reduce_dim,
                                                 scaling_shape=weight_scaling_shape,
                                                 bit_width_impl_type=weight_bit_width_impl_type,
                                                 bit_width_impl_override=weight_bit_width_impl_override,
                                                 restrict_bit_width_type=weight_restrict_bit_width_type,
                                                 min_overall_bit_width=weight_min_overall_bit_width,
                                                 max_overall_bit_width=weight_max_overall_bit_width,
                                                 tracked_parameter_list_init=self.weight,
                                                 ternary_threshold=weight_ternary_threshold,
                                                 scaling_stats_input_view_shape_impl=weight_stats_input_view_shape_impl,
                                                 scaling_stats_input_concat_dim=weight_scaling_stats_input_concat_dim,
                                                 scaling_stats_sigma=weight_scaling_stats_sigma,
                                                 scaling_min_val=weight_scaling_min_val,
                                                 override_pretrained_bit_width=weight_override_pretrained_bit_width)
        self.bias_quant = BiasQuantProxy(quant_type=bias_quant_type,
                                         narrow_range=bias_narrow_range,
                                         bit_width=bias_bit_width)