예제 #1
0
class CPUUnaryBench(Benchmark):
    args = utils.grid(
        _common_arg(
            {"function": TORCH_ONLY_FUNCTIONS, "framework": ("Torch",)}
        )
    )

    def setupRun(self, state, arg):
        _setupRun(self, state, arg)

    def benchmark(self, state, arg):
        _benchmark(self, state, arg)
예제 #2
0
class NumpyComparison(Benchmark):

    args = utils.grid(
        _common_arg(
            {"function": ALL_UNARY_FUNCTIONS, "framework": ("Torch", "NumPy")}
        )
    )

    def setupRun(self, state, arg):
        _setupRun(self, state, arg)

    def benchmark(self, state, arg):
        _benchmark(self, state, arg)
class CPUUnaryBench(Benchmark):
    args = utils.grid(
        _common_arg({
            "function": TORCH_ONLY_FUNCTIONS,
            "framework": ("Torch", )
        }))

    user_counters = {"sizes": 30 * " ", "strides": 30 * " "}

    def setupRun(self, state, arg):
        _setupRun(self, state, arg)

    def benchmark(self, state, arg):
        _benchmark(self, state, arg)
class NumpyUnaryComparison(Benchmark):

    args = utils.grid(
        _common_arg({
            "function": ALL_UNARY_FUNCTIONS,
            "framework": ("Torch", "NumPy")
        }))

    user_counters = {"sizes": 30 * " ", "strides": 30 * " "}

    def setupRun(self, state, arg):
        _setupRun(self, state, arg)

    def benchmark(self, state, arg):
        _benchmark(self, state, arg)
예제 #5
0
class NumpyReduceComparison(Benchmark):

    # NB: NumPy doesn't parallelize it's reductions
    args = utils.grid({
        "dims": ((3, None), (3, 2), (3, 1), (3, 0)),
        "mag": (6, ),
        "cont": (False, True),
        "trans": (False, True),
        "dtype": (torch.float, ),
        "function": ALL_REDUCE_FUNCTIONS,
        "framework": ("Torch", ),  # "NumPy"),
    })

    user_counters = {"shape": 10 * " "}

    def _benchmark(self, state, arg):
        if arg.framework == "Torch":
            if arg.dims[1]:
                getattr(torch, arg.function[0])(state.torch_tensor,
                                                arg.dims[1],
                                                out=state.output)
            else:
                getattr(torch, arg.function[0])(state.torch_tensor)
        else:
            if arg.dims[1]:
                getattr(np, arg.function[1])(state.numpy_tensor,
                                             axis=arg.dims[1],
                                             out=state.output)
            else:
                getattr(np, arg.function[1])(state.numpy_tensor)

    def setupRun(self, state, arg):
        size_ = int(math.pow(10, arg.mag))
        tv = make_tensor(size_, arg.dtype, arg.cont, arg.dims[0], arg.trans)
        state.shape = str(tv.size())
        state.torch_tensor = tv
        state.output = None
        if arg.framework == "NumPy":
            if arg.dtype == torch.float:
                state.numpy_tensor = state.torch_tensor.numpy()
                assert state.numpy_tensor.dtype == np.float32
            if arg.dtype == torch.double:
                state.numpy_tensor = state.torch_tensor.numpy()
                assert state.numpy_tensor.dtype == np.float64
        self._benchmark(state, arg)

    def benchmark(self, state, arg):
        self._benchmark(state, arg)
예제 #6
0
class CPULSTMBench(Benchmark):
    sizes = [
        [64, 15, 500, 500],
        [64, 20, 500, 500],
        [64, 25, 500, 500],
        [64, 30, 500, 500],
        [64, 35, 500, 500],
        [64, 40, 500, 500],
        [64, 45, 500, 500],
        [64, 50, 500, 500],
        [16, 25, 512, 512],
        [32, 25, 512, 512],
        [64, 25, 512, 512],
        [128, 25, 512, 512],
        [16, 25, 1024, 1024],
        [32, 25, 1024, 1024],
        [64, 25, 1024, 1024],
        [128, 25, 1024, 1024],
        [16, 25, 2048, 2048],
        [32, 25, 2048, 2048],
        [64, 25, 2048, 2048],
        [128, 25, 2048, 2048],
        [16, 25, 4096, 4096],
        [32, 25, 4096, 4096],
        [64, 25, 4096, 4096],
        [128, 25, 4096, 4096],
    ]
    args = utils.grid({"size": sizes, "train": (True, False)})
    user_counters = {
        "duration": 0,
        "gflops": 10 * " ",
        "GFLOPS": 10 * " ",
        "SPS": 10 * " ",
    }

    def setupRun(self, state, arg):
        size = arg.size

        N = size[0]  # batch size
        T = size[1]  # sentence length
        D = size[2]  # embedding size
        H = size[3]  # hidden size

        state.N, state.T, state.D, state.H = N, T, D, H

        state.rnn = nn.LSTM(D, H, 1)
        state.input = Variable(torch.randn(T, N, D))
        state.h0 = Variable(torch.randn(1, N, H))
        state.c0 = Variable(torch.randn(1, N, H))

        state.output, state.hn = state.rnn(state.input, (state.h0, state.c0))
        if arg.train:
            state.loss_fn = torch.nn.L1Loss()

        state.targets = Variable(torch.randn(T, N, D))
        state.num_iter = 0
        state.elapsed = 0

    def benchmark(self, state, arg):
        start = time.time()
        state.output, state.hn = state.rnn(state.input, (state.h0, state.c0))
        if arg.train:
            loss = state.loss_fn(state.output, state.targets)
            loss.backward()
        state.elapsed += time.time() - start
        state.num_iter += 1

    def teardownRun(self, state, arg):
        dura = (state.elapsed) / state.num_iter  # time of ONE iteration
        N, T, D, H = state.N, state.T, state.D, state.H
        gflops = T * 4 * (N * H * D * 2 + N * H * H * 2) / 1e9
        GFLOPS = gflops / dura  # giga floating-point operations per second
        SPS = N / dura  # number of processed sentences per second
        state.duration = "{:.4f}".format(dura)
        state.gflops = "{:.4f}".format(gflops)
        state.GFLOPS = "{:.4f}".format(GFLOPS)
        state.SPS = "{:.4f}".format(SPS)
class CPUConvnets(Benchmark):
    args = utils.grid({
        ("arch", "size"): (
            ("alexnet", (128, 3, 224, 224)),
            ("vgg11", (64, 3, 224, 224)),
            ("inception_v3", (128, 3, 299, 299)),
            ("resnet50", (128, 3, 224, 224)),
            ("squeezenet1_0", (128, 3, 224, 224)),
            ("densenet121", (32, 3, 224, 224)),
            # ("mobilenet_v2", (128, 3, 224, 224)),
        ),
        "single_batch_size": (True, False),
        "inference": (True, False),
    })
    user_counters = {
        "time_fwd_avg": 0,
        "time_bwd_avg": 0,
        "time_upt_avg": 0,
        "time_total": 0,
    }

    def setupRun(self, state, arg):
        arch, sizes = arg[("arch", "size")]
        batch_size, c, h, w = sizes[0], sizes[1], sizes[2], sizes[3]
        batch_size = 1 if arg.single_batch_size else batch_size

        data_ = torch.randn(batch_size, c, h, w)
        target_ = torch.arange(1, batch_size + 1).long()
        state.net = models.__dict__[arch](
        )  # no need to load pre-trained weights for dummy data

        state.optimizer = optim.SGD(state.net.parameters(), lr=0.01)
        state.criterion = nn.CrossEntropyLoss()

        state.net.eval()

        state.data, state.target = Variable(data_), Variable(target_)

        state.steps = 0
        state.time_fwd = 0
        state.time_bwd = 0
        state.time_upt = 0

    def benchmark(self, state, arg):
        state.optimizer.zero_grad()  # zero the gradient buffers
        t1 = time.time()
        output = state.net(state.data)
        t2 = time.time()
        if not arg.inference:
            loss = state.criterion(output, state.target)
            loss.backward()
            t3 = time.time()
            state.optimizer.step()  # Does the update
            t4 = time.time()
        state.time_fwd += t2 - t1
        if not arg.inference:
            state.time_bwd += t3 - t2
            state.time_upt += t4 - t3
        state.steps += 1

    def teardownRun(self, state, arg):

        time_fwd_avg = state.time_fwd / state.steps * 1000
        time_bwd_avg = state.time_bwd / state.steps * 1000
        time_upt_avg = state.time_upt / state.steps * 1000

        # update not included!
        time_total = time_fwd_avg + time_bwd_avg

        state.time_fwd_avg = "{:2.3f}".format(time_fwd_avg)
        state.time_bwd_avg = "{:2.3f}".format(time_bwd_avg)
        state.time_upt_avg = "{:2.3f}".format(time_upt_avg)
        state.time_total = "{:2.3f}".format(time_total)