コード例 #1
0
        # Quantize the tensor
        self.q_input = torch.quantize_per_tensor(f_input,
                                                 scale=self.scale,
                                                 zero_point=self.zero_point,
                                                 dtype=dtype)
        if not contig:
            # Make non-contiguous
            new_shape = list(range(self.q_input.ndim))[::-1]
            self.q_input = self.q_input.permute(new_shape)

    def init(self, dims, contig, inplace, dtype, op_func):
        self._setup(dims, contig, dtype)
        self.qop = op_func

    def forward(self):
        if self.qop in (nnq.functional.hardswish, nnq.functional.elu,
                        nnq.functional.celu):
            return self.qop(self.q_input,
                            scale=self.scale,
                            zero_point=self.zero_point)
        return self.qop(self.q_input)


op_bench.generate_pt_tests_from_op_list(
    qactivation_ops, qactivation_short_configs + qactivation_long_configs,
    QActivationBenchmarkBase)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #2
0
ファイル: qactivation_test.py プロジェクト: mfkasim1/pytorch
            q_input = q_input.permute(new_shape)

        self.inputs = {"q_input": q_input}

    def init(self, dims, contig, inplace, dtype, op_func):
        self._setup(dims, contig, dtype)
        self.qop = op_func


class QActivationBenchmark(QActivationBenchmarkBase):
    def forward(self, q_input):
        return self.qop(q_input)


op_bench.generate_pt_tests_from_op_list(
    qactivation_ops, qactivation_short_configs + qactivation_long_configs,
    QActivationBenchmark)

qactivation_scale_zero_point_ops = op_bench.op_list(
    attrs=(
        ('functional.hardswish', nnq.functional.hardswish),
        ('functional.elu', nnq.functional.elu),
        ('functional.celu', nnq.functional.celu),
    ),
    attr_names=('op_name', 'op_func'),
)


class QActivationScaleZeroPointBenchmark(QActivationBenchmarkBase):
    def forward(self, q_input):
        return self.qop(q_input, scale=self.scale, zero_point=self.zero_point)
コード例 #3
0
hardswish_configs_long = op_bench.cross_product_configs(N=[8, 16],
                                                        C=[3],
                                                        H=[256, 512],
                                                        W=[256, 512],
                                                        device=['cpu'],
                                                        tags=['long'])

hardswish_ops_list = op_bench.op_list(
    attr_names=['op_name', 'op_func'],
    attrs=[
        ['Hardswish', nn.Hardswish],
    ],
)


class HardswishBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, N, C, H, W, device, op_func):
        self.input_one = torch.rand(N, C, H, W, device=device)
        self.op_func = op_func()

    def forward(self):
        return self.op_func(self.input_one)


op_bench.generate_pt_tests_from_op_list(
    hardswish_ops_list, hardswish_configs_short + hardswish_configs_long,
    HardswishBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #4
0
)


class Pool1dBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, kernel, stride, N, C, L, device, op_func):
        self.input = torch.rand(N, C, L, device=device)
        self.kernel = kernel
        self.stride = stride
        self.op_func = op_func(self.kernel, stride=self.stride)

    def forward(self):
        return self.op_func(self.input)


op_bench.generate_pt_tests_from_op_list(
    pool_1d_ops_list, pool_1d_configs_short + pool_1d_configs_long,
    Pool1dBenchmark)
"""
Microbenchmarks for MaxPool2d and AvgPool2d operators.
"""

# Configs for pool-2d ops
pool_2d_configs_short = op_bench.config_list(
    attr_names=['kernel', 'stride', 'N', 'C', 'H', 'W'],
    attrs=[
        [[3, 1], [2, 1], 1, 16, 32, 32],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=['short'])
コード例 #5
0
ファイル: qarithmetic_test.py プロジェクト: mikekgfb/pytorch
            "q_input_a": self.q_input_a,
            "q_input_b": self.q_input_a,
            "scale": self.scale,
            "zero_point": self.zero_point
        }
        self.op_func = op_func

    def forward(self, q_input_a, q_input_b, scale: float, zero_point: int):
        return self.op_func(q_input_a,
                            q_input_b,
                            scale=scale,
                            zero_point=zero_point)


op_bench.generate_pt_tests_from_op_list(qarithmetic_binary_ops,
                                        qarithmetic_binary_configs,
                                        QFunctionalBenchmark)


class QFunctionalScalarBenchmark(_QFunctionalBinaryArithmeticBenchmarkBase):
    def init(self, N, dtype, contig, op_func):
        super(QFunctionalScalarBenchmark, self).setup(N, dtype, contig)
        self.inputs = {"q_input": self.q_input_a, "scalar_input": 42}
        self.op_func = op_func

    def forward(self, q_input, scalar_input: int):
        return self.op_func(q_input, scalar_input)


op_bench.generate_pt_tests_from_op_list(qarithmetic_binary_scalar_ops,
                                        qarithmetic_binary_configs,
コード例 #6
0
        self.inputs = {
            "input": self.input,
            "scale": self.scale,
            "zero_point": self.zero_point,
            "quant_min": self.quant_min,
            "quant_max": self.quant_max,
        }
        self.op_func = op_func

    def forward(self, input, scale, zero_point, quant_min: int,
                quant_max: int):
        return self.op_func(input, scale, zero_point, quant_min, quant_max)


op_bench.generate_pt_tests_from_op_list(
    fake_quantize_per_tensor_ops,
    fake_quantize_operator_configs_short + fake_quantize_operator_configs_long,
    FakeQuantizePerTensorBaseOpBenchmark)
op_bench.generate_pt_gradient_tests_from_op_list(
    fake_quantize_per_tensor_ops,
    fake_quantize_operator_configs_short + fake_quantize_operator_configs_long,
    FakeQuantizePerTensorBaseOpBenchmark)


def fakeQuantizePerChannelLearnableKernel(input, scale, zero_point, axis: int,
                                          quant_min: int, quant_max: int):
    return torch._fake_quantize_learnable_per_channel_affine(
        input, scale, zero_point, axis, quant_min, quant_max)


def fakeQuantizePerChannelOriginalKernel(input, scale, zero_point, axis: int,
                                         quant_min: int, quant_max: int):
コード例 #7
0
class BatchElementWiseBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, B, M, N, device, op_func):
        self.inputs = {
            "input_one": torch.rand(B, M, N, device=device),
            "input_two": torch.rand(B, M, N, device=device)
        }
        self.op_func = op_func

    def forward(self, input_one, input_two):
        if self.op_func.__name__ == "einsum":
            return torch.einsum('bij,bij->bij', input_one, input_two)
        else:
            return torch.mul(input_one, input_two)


op_bench.generate_pt_tests_from_op_list(
    batch_mm_op_list,
    batch_mm_configs_short + batch_mm_configs_long,
    BatchMatrixMultBenchmark,
)

op_bench.generate_pt_tests_from_op_list(
    batch_elementwise_op_list,
    batch_elementwise_configs_short + batch_elementwise_configs_long,
    BatchElementWiseBenchmark,
)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #8
0
    def init(self, num_embeddings, embedding_dim, op_func):
        self.weight = torch.from_numpy((np.random.random_sample(
            (num_embeddings, embedding_dim)) + 1).astype(np.float32))
        self.op_func = op_func

    def forward(self):
        return self.op_func(self.weight)


class EmbeddingBagFusedToFloatBase(op_bench.TorchBenchmarkBase):
    def init(self, num_embeddings, embedding_dim, op_func):
        weight = torch.randn(num_embeddings,
                             embedding_dim + 8,
                             dtype=torch.float)
        self.packed_weight = weight.to(torch.uint8)
        self.op_func = op_func

    def forward(self):
        return self.op_func(self.packed_weight)


op_bench.generate_pt_tests_from_op_list(
    conversion_ops, embeddingbag_conversion_short_configs +
    embeddingbag_conversion_long_configs, EmbeddingBagFloatToFusedBase)
op_bench.generate_pt_tests_from_op_list(
    unpack_ops, embeddingbag_conversion_short_configs +
    embeddingbag_conversion_long_configs, EmbeddingBagFusedToFloatBase)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #9
0
    def forward(
        self,
        prepacked_weights,
        indices,
        offsets,
        mode: int,
        per_sample_weights: Optional[torch.Tensor],
        include_last_offset: bool,
        is_pruned_weights: bool,
        compressed_indices: Optional[torch.Tensor]
    ):
        return self.op_func(prepacked_weights, indices, offsets,
                            mode=0,
                            per_sample_weights=per_sample_weights,
                            include_last_offset=self.include_last_offset,
                            pruned_weights=self.is_pruned_weights,
                            compressed_indices_mapping=self.compressed_indices)


op_bench.generate_pt_tests_from_op_list(four_bit_rowwise_ops,
                                        full_configs,
                                        EmbedddingBag4BitRowwiseOffsetsTest)
op_bench.generate_pt_tests_from_op_list(byte_rowwise_ops,
                                        full_configs,
                                        EmbedddingBagByteRowwiseOffsetsTest)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #10
0
    W=[256, 512],
    device=['cpu'],
    tags=['long']
)


hardsigmoid_ops_list = op_bench.op_list(
    attr_names=['op_name', 'op_func'],
    attrs=[
        ['Hardsigmoid', nn.Hardsigmoid],
    ],
)


class HardsigmoidBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, N, C, H, W, device, op_func):
        self.input_one = torch.rand(N, C, H, W, device=device)
        self.op_func = op_func()

    def forward(self):
        return self.op_func(self.input_one)


op_bench.generate_pt_tests_from_op_list(hardsigmoid_ops_list,
                                        hardsigmoid_configs_short + hardsigmoid_configs_long,
                                        HardsigmoidBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #11
0
    },
    tags=["short"])


class BinaryOpBcastBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, in_one, in_two, dtype, device, op_func):
        self.in_one = torch.randn(in_one, device=device).to(dtype=dtype)
        self.in_two = torch.randn(in_two, device=device).to(dtype=dtype)
        self.op_func = op_func

    def forward(self):
        return self.op_func(self.in_one, self.in_two)


op_bench.generate_pt_tests_from_op_list(binary_ops_bcast_list,
                                        binary_configs_broadcast,
                                        BinaryOpBcastBenchmark)

# Benchmark ops performance without broadcast
binary_ops_list = op_bench.op_list(
    attr_names=['op_name', 'op_func'],
    attrs=[
        ['add', torch.add],
        ['copy_', lambda in1, in2: in1.copy_(in2)],
    ],
)

binary_short_configs = op_bench.config_list(
    attr_names=['M', 'N', 'K'],
    attrs=[
        [1, 1, 1],
コード例 #12
0
    attrs=[
        ['PerChannelMinMaxObserver', obs.PerChannelMinMaxObserver],
        [
            'MovingAveragePerChannelMinMaxObserver',
            obs.MovingAveragePerChannelMinMaxObserver
        ],
    ])


class QObserverBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, C, M, N, dtype, qscheme, op_func, device):
        self.f_input = torch.rand(C, M, N, device=device)
        self.op_func = op_func(dtype=dtype, qscheme=qscheme).to(device)

    def forward(self):
        return self.op_func(self.f_input)


op_bench.generate_pt_tests_from_op_list(
    qobserver_per_tensor_list,
    qobserver_per_tensor_configs_short + qobserver_per_tensor_configs_long,
    QObserverBenchmark)

op_bench.generate_pt_tests_from_op_list(
    qobserver_per_channel_list,
    qobserver_per_channel_configs_short + qobserver_per_channel_configs_long,
    QObserverBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #13
0
        self.q_input_a = torch.quantize_per_tensor(f_input,
                                                   scale=scale,
                                                   zero_point=zero_point,
                                                   dtype=dtype)

        if not contig:
            permute_dims = list(range(f_input.ndim))[::-1]
            self.q_input_a = self.q_input_a.permute(permute_dims)

    def forward(self):
        return getattr(self.qfunctional, self.qop)(self.q_input_a,
                                                   self.q_input_b)


class QFunctionalAddBenchmarkBase(_QFunctionalBinaryArithmeticBenchmarkBase):
    def init(self, N, dtype, contig, op_func):
        super(QFunctionalAddBenchmarkBase, self).setup(N, dtype, contig)
        self.qop = op_func
        if self.qop.endswith('_scalar'):
            self.q_input_b = 42
        else:
            self.q_input_b = self.q_input_a


op_bench.generate_pt_tests_from_op_list(qarithmetic_binary_ops,
                                        qarithmetic_binary_configs,
                                        QFunctionalAddBenchmarkBase)

if __name__ == '__main__':
    op_bench.benchmark_runner.main()
コード例 #14
0
        q_input_a = torch.quantize_per_tensor(f_input, scale=scale,
                                              zero_point=zero_point,
                                              dtype=dtype)
        if other_scalar:
            q_input_b = 42
        else:
            q_input_b = q_input_a.clone()

        if not contig:
            permute_dims = list(range(f_input.ndim))[::-1]
            q_input_a = q_input_a.permute(permute_dims)

        self.qop = op_func
        self.args = (q_input_a, q_input_b)
        self.kwargs = {}
        if out_variant:
            self.kwargs['out'] = torch.tensor([], dtype=torch.bool)

    def forward(self):
        return self.qop(*self.args, **self.kwargs)


op_bench.generate_pt_tests_from_op_list(qcomparators_ops,
                                        qcomparators_configs,
                                        QComparatorBenchmark)


if __name__ == '__main__':
    op_bench.benchmark_runner.main()
コード例 #15
0
    [16, 256, 28, 28],
],
                                            attr_names=['N', 'C', 'H', 'W'],
                                            tags=['long'])

softmax_ops_list = op_bench.op_list(
    attr_names=['op_name', 'op_func'],
    attrs=[
        ['Softmax', nn.Softmax],
        ['Softmax2d', nn.Softmax2d],
        ['LogSoftmax', nn.LogSoftmax],
    ],
)


class SoftmaxBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, N, C, H, W, op_func):
        self.input_one = torch.rand(N, C, H, W)
        self.op_func = op_func()

    def forward(self):
        return self.op_func(self.input_one)


op_bench.generate_pt_tests_from_op_list(
    softmax_ops_list, softmax_configs_short + softmax_configs_long,
    SoftmaxBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #16
0
    def init(self, M, N, dtype, contig, op_func):
        f_input = torch.rand(M, N)
        scale = 1.0
        zero_point = 0
        self.q_input = torch.quantize_per_tensor(f_input,
                                                 scale=scale,
                                                 zero_point=zero_point,
                                                 dtype=dtype)
        if not contig:
            permute_dims = list(range(self.q_input.ndim))[::-1]
            self.q_input = self.q_input.permute(permute_dims)
        self.op_func = op_func


class QMethodTensorInputBenchmark(_QMethodBenchmarkBase):
    def forward(self):
        getattr(self.q_input, self.op_func)(self.q_input)


class QMethodNoInputBenchmark(_QMethodBenchmarkBase):
    def forward(self):
        getattr(self.q_input, self.op_func)()


op_bench.generate_pt_tests_from_op_list(
    qmethods_tensor_input_list, qmethods_configs_short + qmethods_configs_long,
    QMethodTensorInputBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #17
0
    N=[32, 64],
    K=[256, 512],
    device=['cpu', 'cuda'],
    dtype=[torch.int32, torch.float, torch.double],
    tags=['long'])


class RemainderOpBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device, dtype, op_func):
        self.dividend = torch.rand(M, N, K, device=device)
        self.dividend = (self.dividend * 1000 - 500).to(dtype=dtype)

        self.divisor = torch.rand(M, N, K, device=device)
        # +1 so we don't divide by zero
        self.divisor = (self.divisor * 40 + 1).to(dtype=dtype)

        self.inputs = {"dividend": self.dividend, "divisor": self.divisor}

        self.op_func = op_func

    def forward(self, dividend, divisor):
        return self.op_func(dividend, divisor)


op_bench.generate_pt_tests_from_op_list(
    remainder_ops_list, remainder_short_configs + remainder_long_configs,
    RemainderOpBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #18
0
ファイル: unary_test.py プロジェクト: yuk12/pytorch
        ['sinh', torch.sinh],
        ['sqrt', torch.sqrt],
        ['sqrt_', torch.sqrt_],
        ['tan', torch.tan],
        ['tan_', torch.tan_],
        ['tanh', torch.tanh],
        ['tanh_', torch.tanh_],
        ['trunc', torch.trunc],
        ['trunc_', torch.trunc_],
        ['unique', torch.unique],
        ['zero_', torch.zero_],
        ['bernoulli_', lambda t: t.bernoulli_()],
        ['cauchy_', lambda t: t.cauchy_()],
        ['digamma_', lambda t: t.digamma_()],
        ['exponential_', lambda t: t.exponential_()],
        ['normal_', lambda t: t.normal_()],
        ['random_', lambda t: t.random_()],
        ['sign_', lambda t: t.sign_()],
        ['uniform_', lambda t: t.uniform_()],
        ['half', lambda t: t.half()],
        ['long', lambda t: t.long()],
    ],
)

op_bench.generate_pt_tests_from_op_list(
    unary_ops_list, unary_ops_configs_short + unary_ops_configs_long,
    UnaryOpBenchmark)

if __name__ == "__main__":
    op_bench.benchmark_runner.main()
コード例 #19
0

class ReplaceNaNBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, dtype, replace_inf, op_func):
        input = torch.randn(M, N, dtype=dtype)
        input[0][0] = float("nan")
        self.inputs = {
            "input": input,
            "replace_inf": replace_inf
        }
        self.op_func = op_func
        self.set_module_name("nan_to_num")

    def forward(self, input, replace_inf: bool):
        # compare inplace
        if replace_inf:
            return self.op_func(input, nan=1.0)
        else:
            return self.op_func(input, nan=1.0, posinf=math.inf, neginf=-math.inf)


op_bench.generate_pt_tests_from_op_list(
    nan_to_num_ops_list,
    nan_to_num_long_configs + nan_to_num_short_configs,
    ReplaceNaNBenchmark,
)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()