return self.quantize(input_, self.scale, self.num_bits)


if __name__ == '__main__':
    per_tensor_scale_shape = (1, )
    for input_name, input_size, gpu_runs in TEST_PARAMS_STRUCT:
        weight_per_channel_scale_shape = get_per_channel_scale_shape(
            input_size, is_weights=True)
        act_per_channel_scale_shape = get_per_channel_scale_shape(
            input_size, is_weights=False)

        print("CUDA " + input_name)
        print("------------------------------------------------")
        print("Pytorch Symmetric (cuda 0) impl:")
        print("input size: {0}".format(input_size))
        run_profile(
            ReferenceQuantize(NBITS).cuda(), input_size, 'cuda', gpu_runs)

        print()
        print("Custom Symmetric (cuda 0 ) impl:")
        print("input size: {0}".format(input_size))
        run_profile(
            SymmetricQuantizer(
                DefaultedPTQuantizerSpec(scale_shape=per_tensor_scale_shape,
                                         num_bits=NBITS)).cuda(), input_size,
            'cuda', gpu_runs)

        print()
        print("Pytorch Symmetric Per Weight Channel (cuda 0) impl:")
        print("input size: {0}".format(input_size))
        run_profile(
            ReferenceQuantize(NBITS,
        self.quantize = ReferenceQuantizeSymmetric.apply

    def get_scale(self):
        return self.scale

    def forward(self, input_):
        return self.quantize(input_, self.scale, self.num_bits)


if __name__ == '__main__':
    for input_name, input_size, gpu_runs in TEST_PARAMS_STRUCT:
        print("CUDA " + input_name)
        print("------------------------------------------------")
        print("Pytorch Symmetric (cuda 0) impl:")
        print("input size: {0}".format(input_size))
        run_profile(
            ReferenceQuantize(NBITS).cuda(), input_size, 'cuda', gpu_runs)

        print()
        print("Custom Symmetric (cuda 0 ) impl:")
        print("input size: {0}".format(input_size))
        run_profile(
            SymmetricQuantizer(QuantizerConfig(
                QuantizationParams(bits=NBITS))).cuda(), input_size, 'cuda',
            gpu_runs)

        print()
        print("Pytorch Symmetric Per Weight Channel (cuda 0) impl:")
        print("input size: {0}".format(input_size))
        run_profile(
            ReferenceQuantize(NBITS,
                              input_shape=input_size,
    def forward(self, input_):
        return ReferenceActivationBinarize.apply(input_, self.scale,
                                                 self.threshold)


if __name__ == '__main__':
    for input_name, input_size, gpu_runs in TEST_PARAMS_STRUCT:
        print()
        print("CUDA " + input_name)
        print("------------------------------------------------")
        print("Pytorch XNOR weight binarization (cuda 0) impl:")
        print("input size: {0}".format(input_size))
        run_profile(ReferenceWeightBinarizationModule('xnor').cuda(),
                    input_size,
                    'cuda',
                    gpu_runs,
                    forward_only=True)

        print()
        print("Custom XNOR weight binarization (cuda 0) impl:")
        print("input size: {0}".format(input_size))
        run_profile(XNORBinarize(enabled=True).cuda(),
                    input_size,
                    'cuda',
                    gpu_runs,
                    forward_only=True)

        print()
        print("Pytorch DoReFa weight binarization (cuda 0) impl:")
        print("input size: {0}".format(input_size))