Esempio n. 1
0
def run_benchmark(bsz, mean_i, mean_j, var, autograd, writer):
    RAND_INTS = [(int(random.gauss(mean_j, var)), int(
        random.gauss(mean_i, var))) for _ in range(bsz)]
    src_ = nestedtensor.nested_tensor(
        [torch.randn(NDIM * i * j).float().reshape(NDIM, i, j) for (i, j) in RAND_INTS], device=DEVICE, dtype=torch.float)
    src = []
    for i, s in enumerate(src_):
        src.append(i*len(s) + s)

    detr_nt_src = DETRNestedTensor.from_tensor_list(src)
    sparsity = int(detr_nt_src.decompose()[1].float().mean().item() * 10) / 10

    def gen_t_loop_mha(src):
        detr_nt_src = DETRNestedTensor.from_tensor_list(src)
        src, mask = detr_nt_src.decompose()
        src = src.flatten(2).permute(2, 0, 1).contiguous()
        mask = mask.flatten(1).contiguous()
        if autograd:
            src.requires_grad_()

        def te():
            if autograd:
                MODEL(src, src, src, key_padding_mask=mask,
                      need_weights=False)[0].sum().backward()
            MODEL(src, src, src, key_padding_mask=mask,
                  need_weights=False)

        return te

    def gen_nt_mha(src):
        src = nestedtensor.nested_tensor([t.flatten(1).permute(
            1, 0) for t in src], device=DEVICE, dtype=torch.float, requires_grad=True)

        def nt():
            if autograd:
                MODEL(src, src, src, need_weights=False)[0].sum().backward()
            MODEL(src, src, src, need_weights=False)

        return nt

    result_t = {**utils.benchmark_fn(gen_t_loop_mha(src), 5.0, cuda=True), "bsz": bsz,
                "sparsity": sparsity, "autograd": autograd, "var": var, "mean_i": mean_i, "mean_j": mean_j}
    result_t["numel"] = sum([x.numel() for x in src_])
    result_t["numel_div_avg_us"] = result_t["numel"]  /  result_t["avg_us"]
    result_t["avg_ns_div_numel"] = result_t["avg_us"] / result_t["numel"] * 1000
    writer.writerow(result_t)
    result_nt = {**utils.benchmark_fn(gen_nt_mha(src), 5.0, cuda=True),
                 "bsz": bsz, "sparsity": 0.0, "autograd": autograd, "var": var, "mean_i": mean_i, "mean_j": mean_j}
    result_nt["numel"] = sum([x.numel() for x in src_])
    result_nt["numel_div_avg_us"] = result_nt["numel"]  /  result_nt["avg_us"]
    result_nt["avg_ns_div_numel"] = result_nt["avg_us"] / result_nt["numel"] * 1000
    writer.writerow(result_nt)
Esempio n. 2
0
    def run(self):
        params = itertools.product(
            self.args.cuda,
            self.args.N,
            self.args.C,
            self.args.H,
            self.args.W,
            self.args.seed,
        )
        if self.args.V:
            var_params = [(v, v) for v in self.args.V]
        else:
            var_params = itertools.product(self.args.HV, self.args.WV)
        params = [[p + v for v in var_params] for p in params]
        params = sum(params, [])

        writer = None
        i = 0
        for cuda, n, c, h, w, seed, h_var, w_var in params:
            # generate inputs before iterating layers to have the same imput per layer
            self.inputs, self.targets = self.get_input(cuda, n, c, h, w, h_var,
                                                       w_var, seed)

            benchmarks = [(layer, self.get_benchmark(c, layer, cuda))
                          for layer in self.args.layers]
            for layer, benchmark in benchmarks:
                result = utils.benchmark_fn(benchmark,
                                            run_time=self.args.run_time,
                                            warmup=self.args.warmup)
                result["#"] = str(i) + "/" + str(len(benchmarks) * len(params))
                result["N"] = n
                result["C"] = c
                result["H"] = h
                result["W"] = w
                result["h_var"] = h_var
                result["w_var"] = w_var
                result["seed"] = seed
                result["avg_us"] = int(result["avg_us"])
                result["std_us"] = int(result["std_us"])
                result["name"] = layer
                result["cuda"] = cuda
                result["numel"] = sum(x.numel() for x in self.inputs)
                if writer is None and self.args.csv_log:
                    writer = csv.DictWriter(open(self.args.csv_log, 'w'),
                                            fieldnames=result.keys())
                    writer.writeheader()
                if writer is not None:
                    writer.writerow(result)
                print(",".join(
                    str((str(key), result[key]))
                    for key in sorted(result.keys())))
                i += 1
Esempio n. 3
0
        nested_tensor.unbind()

    return ant


def gen_nt_unbind_2():
    nested_tensor = nestedtensor.nested_tensor(
        [[torch.rand(i, 25) for i in RAND_INTS] for j in range(100)])

    def nt_2():
        [t.unbind() for t in nested_tensor.unbind()]

    return nt_2


def gen_ant_unbind_2():
    nested_tensor = nestedtensor.as_nested_tensor(
        [[torch.rand(i, 25) for i in RAND_INTS] for j in range(100)])

    def ant_2():
        [t.unbind() for t in nested_tensor.unbind()]

    return ant_2


if __name__ == "__main__":
    print(utils.benchmark_fn(gen_nt_unbind()))
    print(utils.benchmark_fn(gen_ant_unbind()))
    print(utils.benchmark_fn(gen_nt_unbind_2()))
    print(utils.benchmark_fn(gen_ant_unbind_2()))
Esempio n. 4
0
    tensor = torch.cat([torch.rand(i, 2560).reshape(-1) for i in RAND_INTS])
    tensor = tensor.cuda()

    def t():
        tensor.cos_()
    return t


def gen_t_loop_cos():
    tensors = [torch.rand(i, 2560).cuda() for i in RAND_INTS]

    def t_loop():
        for t in tensors:
            t.cos_()
    return t_loop


def gen_nt_cos():
    nested_tensor = nestedtensor.nested_tensor(
        [torch.rand(i, 2560).cuda() for i in RAND_INTS])

    def nt():
        nested_tensor.cos_()
    return nt


if __name__ == "__main__":
    print(utils.benchmark_fn(gen_t_cos()))
    print(utils.benchmark_fn(gen_t_loop_cos()))
    print(utils.benchmark_fn(gen_nt_cos()))
Esempio n. 5
0
# Performance tanks hard for lots of small Tensors as expected
RAND_INTS = [random.randint(10, 30) for _ in range(2000)]

OUTDIM=256

TENSORS0 = [torch.rand(i, OUTDIM).cuda() for i in RAND_INTS]

def gen_t_matmul():
    nt0 = nestedtensor.nested_tensor(TENSORS0, device=torch.device('cuda'), dtype=torch.float)
    data, _ = nt0.to_tensor_mask()
    t1 = torch.randn(OUTDIM, 512).cuda()

    def t():
        torch.matmul(data, t1)
    return t


@torch.inference_mode()
def gen_nt_matmul():
    nt0 = nestedtensor.nested_tensor(TENSORS0, device=torch.device('cuda'), dtype=torch.float)
    t1 = torch.randn(OUTDIM, 512).cuda()

    def nt():
        torch.matmul(nt0, t1)
    return nt


if __name__ == "__main__":
    print(utils.benchmark_fn(gen_t_matmul()))
    print(utils.benchmark_fn(gen_nt_matmul()))
Esempio n. 6
0
        @torch.jit.ignore
        def get_tensor() -> torch.Tensor:
            return tensor

        @torch.jit.script
        def my_fun(x, y):
            x = x + get_scalar()
            x = x + get_tensor()
            y = y + x.abs()
            return y

        return my_fun

    my_fun = gen_my_fun(3.0, torch.randn(1).to(device='cuda'))

    def _algorithm_jit():
        nestedtensor._C.jit_apply_function((n, n), my_fun)

    return _algorithm_jit


if __name__ == "__main__":
    # print(utils.benchmark_fn(alg, use_cprofile=True))
    # alg = gen_list_nested_tensor_construction()
    # print(utils.benchmark_fn(alg))
    alg1 = gen_current()
    print(utils.benchmark_fn(alg1))
    alg2 = gen_jit()
    print(utils.benchmark_fn(alg2))
Esempio n. 7
0
import utils

import random


def gen_list_nested_tensor_construction():
    tensors = [torch.rand(random.randint(500, 1500), 25600) for _ in range(20)]

    def _algorithm():
        torch._ListNestedTensor(tensors)

    return _algorithm


def gen_list_nested_tensor_unbind():
    nested_tensor = torch._ListNestedTensor(
        [torch.rand(random.randint(500, 1500), 25600) for _ in range(20)])

    def _algorithm():
        nested_tensor.unbind()

    return _algorithm


if __name__ == "__main__":
    # print(utils.benchmark_fn(alg, use_cprofile=True))
    # alg = gen_list_nested_tensor_construction()
    # print(utils.benchmark_fn(alg))
    alg = gen_list_nested_tensor_unbind()
    print(utils.benchmark_fn(alg))
Esempio n. 8
0
return_layers = {'layer4': 'out'}
MODEL = torchvision.models._utils.IntermediateLayerGetter(
    backbone, return_layers=return_layers).cuda()


def gen_t_loop_segmentation():
    tensors = [torch.rand(1, 3, i, 256).cuda() for i in RAND_INTS]

    def t_loop():
        for t in tensors:
            MODEL(t)['out'].sum().backward()

    return t_loop


def gen_nt_segmentation():
    nested_tensor = nestedtensor.nested_tensor(
        [torch.rand(3, i, 256) for i in RAND_INTS],
        device=torch.device('cuda'),
        dtype=torch.float)

    def nt():
        MODEL(nested_tensor)['out'].sum().backward()

    return nt


if __name__ == "__main__":
    # print(utils.benchmark_fn(gen_t_loop_segmentation(), 10.0))
    print(utils.benchmark_fn(gen_nt_segmentation(), 2.0))
Esempio n. 9
0
    return t


def gen_t_loop_mul():
    tensors1 = [torch.rand(i, 2560).cuda() for i in RAND_INTS]
    tensors2 = [torch.rand(i, 2560).cuda() for i in RAND_INTS]

    def t_loop():
        for t1, t2 in zip(tensors1, tensors2):
            t1.mul(t2)

    return t_loop


def gen_nt_mul():
    nested_tensor1 = nestedtensor.nested_tensor(
        [torch.rand(i, 2560).cuda() for i in RAND_INTS])
    nested_tensor2 = nestedtensor.nested_tensor(
        [torch.rand(i, 2560).cuda() for i in RAND_INTS])

    def nt():
        nested_tensor1.mul(nested_tensor2)

    return nt


if __name__ == "__main__":
    print(utils.benchmark_fn(gen_t_mul()))
    print(utils.benchmark_fn(gen_t_loop_mul()))
    print(utils.benchmark_fn(gen_nt_mul()))