flatten = util_ops.flatten unflatten = util_ops.unflatten torch.manual_seed(0) # emulate a small typical model weights x = [ torch.rand((512, 512)).cuda(), torch.rand((512, 1024)).cuda(), torch.rand((512, 30000)).cuda() ] t = x * 30 # warm up and check that the same output is produced flat_py = _flatten_dense_tensors(t) flat_cpp = flatten(t) flat_apex = flatten_apex(t) #numel = flat_cpp.numel() assert torch.eq(flat_py, flat_cpp).all(), "both produce the same tensor" assert torch.eq(flat_py, flat_apex).all(), "both produce the same tensor" TIMES = 1000 # the programs being tested def py(): for i in range(TIMES): flat = _flatten_dense_tensors(t) def cpp(): for i in range(TIMES):
def apex(): for i in range(TIMES): flat = flatten_apex(t)
torch.manual_seed(0) # emulate a small typical model weights x = [ torch.rand((512, 512)).cuda(), torch.rand((512, 1024)).cuda(), torch.rand((512, 30000)).cuda() ] unflat_t = x * 30 # warm up and check that the same output is produced flat_py = _flatten_dense_tensors(unflat_t) flat_cpp = flatten(unflat_t) flat_apex = flatten_apex(unflat_t) #numel = flat_cpp.numel() assert torch.eq(flat_py, flat_cpp).all(), "both produce the same tensor" assert torch.eq(flat_py, flat_apex).all(), "both produce the same tensor" flat_t = flat_py unflat_py = _unflatten_dense_tensors(flat_py, unflat_t) for i in range(len(unflat_t)): assert torch.eq(unflat_t[i], unflat_py[i]).all() unflat_cpp = _unflatten_dense_tensors(flat_cpp, unflat_t) for i in range(len(unflat_t)): assert torch.eq(unflat_t[i], unflat_cpp[i]).all() unflat_apex = _unflatten_dense_tensors(flat_apex, unflat_t) for i in range(len(unflat_t)): assert torch.eq(unflat_t[i], unflat_apex[i]).all()