Exemple #1
0
def testcase_AdaptiveAvgPool2d(
        B=3,
        N=32,
        C=16,
        HWin=28,
        output_size=(16, 16),
        device=torch.device('cpu'),
        dtype=torch.float,
):
    with torch.no_grad():
        x_array = [
            torch.rand(N, C, HWin, HWin, device=device, dtype=dtype)
            for _ in range(B)
        ]
        x_fused = torch.cat([x.unsqueeze(1) for x in x_array], dim=1)
        args = (output_size, )
        pool_array = [nn.AdaptiveAvgPool2d(*args) for _ in range(B)]
        pool_fused = get_hfta_op_for(nn.AdaptiveAvgPool2d, B=B)(*args)
        y_array = [pool_array[b](x_array[b]) for b in range(B)]
        y_fused_actual = pool_fused(x_fused)
        y_fused_expect = torch.cat([y.unsqueeze(1) for y in y_array], dim=1)
        try:
            assert_allclose(
                y_fused_actual.cpu().numpy(),
                y_fused_expect.cpu().numpy(),
                rtol=1e-4,
            )
        except AssertionError as e:
            dump_error_msg(e)
Exemple #2
0
def testcase_MaxPool2d(
        B=3,
        N=32,
        C=16,
        kernel_size=2,
        HWin=28,
        stride=None,
        padding=0,
        dilation=1,
        return_indices=False,
        ceil_mode=False,
        device=torch.device('cpu'),
        dtype=torch.float,
):
    with torch.no_grad():
        x_array = [
            torch.rand(N, C, HWin, HWin, device=device, dtype=dtype)
            for _ in range(B)
        ]
        x_fused = torch.cat([x.unsqueeze(1) for x in x_array], dim=1)
        args = (kernel_size, )
        kwargs = {
            'stride': stride,
            'padding': padding,
            'dilation': dilation,
            'return_indices': return_indices,
            'ceil_mode': ceil_mode,
        }
        pool_array = [nn.MaxPool2d(*args, **kwargs) for _ in range(B)]
        pool_fused = get_hfta_op_for(nn.MaxPool2d, B=B)(*args, **kwargs)
        res_array = [pool_array[b](x_array[b]) for b in range(B)]
        res_fused_actual = pool_fused(x_fused)
        if return_indices:
            y_array, indices_array = tuple(zip(*res_array))
            y_fused_actual, indices_fused_actual = res_fused_actual
        else:
            y_array = res_array
            y_fused_actual = res_fused_actual
        y_fused_expect = torch.cat([y.unsqueeze(1) for y in y_array], dim=1)
        try:
            assert_allclose(
                y_fused_actual.cpu().numpy(),
                y_fused_expect.cpu().numpy(),
                rtol=1e-4,
            )
        except AssertionError as e:
            dump_error_msg(e)
        if return_indices:
            indices_fused_expect = torch.cat(
                [indices.unsqueeze(1) for indices in indices_array],
                dim=1,
            )
            try:
                assert_allclose(
                    indices_fused_actual.cpu().numpy(),
                    indices_fused_expect.cpu().numpy(),
                    rtol=1e-4,
                )
            except AssertionError as e:
                dump_error_msg(e)
Exemple #3
0
def testcase(
    B=3,
    N=32,
    L=8,
    in_features=20,
    out_features=50,
    bias=True,
    device=torch.device('cpu'),
    dtype=torch.float,
):
  with torch.no_grad():
    x_array = [
        torch.rand(N, L, in_features, device=device, dtype=dtype)
        for _ in range(B)
    ]
    x_fused = torch.cat([x.unsqueeze(0) for x in x_array], dim=0)
    args = (in_features, out_features)
    kwargs = {'bias': bias, 'device': device, 'dtype': dtype}
    linear_array = [nn.Linear(*args, **kwargs) for _ in range(B)]
    linear_fused = get_hfta_op_for(nn.Linear, B=B)(*args, **kwargs)
    # Init weights and biases.
    for b in range(B):
      linear_fused.snatch_parameters(linear_array[b], b)
    y_array = [linear_array[b](x_array[b]) for b in range(B)]
    y_fused_actual = linear_fused(x_fused)
    y_fused_expect = torch.cat([y.unsqueeze(0) for y in y_array], dim=0)
    try:
      assert_allclose(
          y_fused_actual.cpu().numpy(),
          y_fused_expect.cpu().numpy(),
          rtol=1e-4,
          population_threshold=1e-3,
      )
    except AssertionError as e:
      dump_error_msg(e)
Exemple #4
0
def testcase(
        B=3,
        N=32,
        input_dim=(20, ),
        num_embeddings=200,
        embedding_dim=50,
        padding_idx=None,
        max_norm=None,
        norm_type=2.,
        scale_grad_by_freq=False,
        sparse=False,
        _weight=None,
        device=torch.device('cpu'),
        x_dtype=torch.int,
        param_dtype=torch.float,
):
    with torch.no_grad():
        x_array = [
            torch.randint(
                num_embeddings,
                [N] + list(input_dim),
                device=device,
                dtype=x_dtype,
            ) for _ in range(B)
        ]
        x_fused = torch.cat([x.unsqueeze(0) for x in x_array], dim=0)
        args = (num_embeddings, embedding_dim)
        kwargs = {
            'padding_idx': padding_idx,
            'max_norm': max_norm,
            'norm_type': norm_type,
            'scale_grad_by_freq': scale_grad_by_freq,
            'sparse': sparse,
            '_weight': _weight,
            'device': device,
            'dtype': param_dtype,
        }
        embedding_array = [nn.Embedding(*args, **kwargs) for _ in range(B)]
        embedding_fused = get_hfta_op_for(nn.Embedding, B=B)(*args, **kwargs)
        # Init weights and biases.
        for b in range(B):
            embedding_fused.snatch_parameters(embedding_array[b], b)
        y_array = [embedding_array[b](x_array[b]) for b in range(B)]
        y_fused_actual = embedding_fused(x_fused)
        y_fused_expect = torch.cat([y.unsqueeze(0) for y in y_array], dim=0)
        try:
            assert_allclose(
                y_fused_actual.cpu().numpy(),
                y_fused_expect.cpu().numpy(),
                rtol=1e-4,
            )
        except AssertionError as e:
            dump_error_msg(e)
Exemple #5
0
def _assert_params_unfused(op_list, op, b):
  assert_allclose(
      op_list[b].weight.data.cpu().numpy(),
      op.weight.data.cpu().numpy(),
      rtol=1e-4,
      population_threshold=1e-2,
  )
  if op_list[b].bias is not None:
    assert_allclose(
        op_list[b].bias.data.cpu().numpy(),
        op.bias.data.cpu().numpy(),
        rtol=1e-4,
        population_threshold=1e-2,
    )
Exemple #6
0
def testcase_Conv2d(
    B=3,
    N=32,
    Cin=4,
    Cout=16,
    kernel_size=3,
    HWin=28,
    stride=1,
    padding=0,
    dilation=1,
    groups=1,
    bias=True,
    padding_mode='zeros',
    device=torch.device('cpu'),
    dtype=torch.float,
):
  with torch.no_grad():
    x_array = [
        torch.rand(N, Cin, HWin, HWin, device=device, dtype=dtype)
        for _ in range(B)
    ]
    x_fused = torch.cat([x.unsqueeze(1) for x in x_array], dim=1)
    args = (Cin, Cout, kernel_size)
    kwargs = {
        'stride': stride,
        'padding': padding,
        'dilation': dilation,
        'groups': groups,
        'bias': bias,
        'padding_mode': padding_mode,
        'device': device,
        'dtype': dtype,
    }
    conv_array = [nn.Conv2d(*args, **kwargs) for _ in range(B)]
    conv_fused = get_hfta_op_for(nn.Conv2d, B=B)(*args, **kwargs)
    # Init weights and biases.
    for b in range(B):
      conv_fused.snatch_parameters(conv_array[b], b)
    y_array = [conv_array[b](x_array[b]) for b in range(B)]
    y_fused_actual = conv_fused(x_fused)
    y_fused_expect = torch.cat([y.unsqueeze(1) for y in y_array], dim=1)
    try:
      assert_allclose(
          y_fused_actual.cpu().numpy(),
          y_fused_expect.cpu().numpy(),
          rtol=1e-4,
          population_threshold=1e-2,
      )
    except AssertionError as e:
      dump_error_msg(e)
Exemple #7
0
def _assert_params_conv2d(fused_op, op, b, fused=True):
  try:
    if fused:
      assert_allclose(
          fused_op.weight.data[b].cpu().numpy(),
          op.weight.data.cpu().numpy(),
          rtol=1e-4,
          population_threshold=1e-2,
      )
      if fused_op.bias is not None:
        assert_allclose(
            fused_op.bias.data[b].cpu().numpy(),
            op.bias.data.cpu().numpy(),
            rtol=1e-4,
            population_threshold=1e-2,
        )
    else:
      _assert_params_unfused(fused_op, op, b)
  except AssertionError as e:
    dump_error_msg(e)
Exemple #8
0
def testcase(
        B=3,
        N=32,
        C=1024,
        HWin=16,
        p=0.4,
        device=torch.device('cpu'),
        dtype=torch.float,
):
    with torch.no_grad():
        x_array = [
            torch.ones(N, C, HWin, HWin, device=device, dtype=dtype)
            for _ in range(B)
        ]
        x_fused = torch.cat([x.unsqueeze(1) for x in x_array], dim=1)
        dropout2d_fused = get_hfta_op_for(torch.nn.Dropout2d, B=B)(p)
        y_fused = dropout2d_fused(x_fused)
        for b in range(B):
            y = y_fused[:, b, :, :, :]
            assert y.size(0) == N
            assert y.size(1) == C
            for n in range(N):
                zero_channels = 0
                for c in range(C):
                    s = y[n, c].sum()
                    # Each channel either has all zeros or no zeros.
                    try:
                        assert_allclose(s.cpu(), HWin**2 / (1 - p), rtol=1e-4)
                    except AssertionError as e:
                        assert_allclose(s.cpu(), 0, atol=1e-4)
                        # s must be zero at this point.
                        zero_channels += 1
                assert_allclose(zero_channels / C, p, rtol=2e-1)
Exemple #9
0
def testcase(
        B=5,
        N=64,
        normalized_shape=(200, ),
        elementwise_affine=True,
        device=torch.device('cpu'),
        dtype=torch.float,
):
    with torch.no_grad():
        x_array = [
            torch.rand([N] + list(normalized_shape),
                       device=device,
                       dtype=dtype) for _ in range(B)
        ]
        x_fused = torch.cat([x.unsqueeze(0) for x in x_array], dim=0)
        args = (normalized_shape, )
        kwargs = {
            'elementwise_affine': elementwise_affine,
            'device': device,
            'dtype': dtype,
        }
        layernorm_array = [nn.LayerNorm(*args, **kwargs) for _ in range(B)]
        layernorm_fused = get_hfta_op_for(nn.LayerNorm, B=B)(*args, **kwargs)
        # Init weights and biases.
        for b in range(B):
            layernorm_fused.snatch_parameters(layernorm_array[b], b)
        y_array = [layernorm_array[b](x_array[b]) for b in range(B)]
        y_fused_actual = layernorm_fused(x_fused)
        y_fused_expect = torch.cat([y.unsqueeze(0) for y in y_array], dim=0)
        try:
            assert_allclose(
                y_fused_actual.cpu().numpy(),
                y_fused_expect.cpu().numpy(),
                rtol=1e-4,
            )
        except AssertionError as e:
            dump_error_msg(e)
Exemple #10
0
def testcase_2d(
        num_features=128,
        eps=1e-5,
        momentum=0.1,
        affine=True,
        track_running_stats=True,
        B=3,
        N=8,
        HWin=28,
        train_test_steps=10,
        training=True,
        device=torch.device('cpu'),
        dtype=torch.float,
):
    C = num_features
    with torch.no_grad():

        args = (num_features, )
        kwargs = {
            'eps': eps,
            'momentum': momentum,
            'affine': affine,
            'track_running_stats': track_running_stats,
            'device': device,
            'dtype': dtype,
        }
        batchNormal2d_array = [
            nn.BatchNorm2d(*args, **kwargs) for _ in range(B)
        ]
        batchNormal2d_fused = get_hfta_op_for(nn.BatchNorm2d, B=B)(*args,
                                                                   **kwargs)
        if track_running_stats:
            rand_int = random.randint(0, 1024)
            for bn in batchNormal2d_array:
                nn.init.normal_(bn.running_mean)
                nn.init.normal_(bn.running_var)
                bn.num_batches_tracked.fill_(rand_int)
        # Init weights and biases.
        for b in range(B):
            batchNormal2d_fused.snatch_parameters(batchNormal2d_array[b], b)

        if training:
            [bn.train() for bn in batchNormal2d_array]
            batchNormal2d_fused.train()
        else:
            [bn.eval() for bn in batchNormal2d_array]
            batchNormal2d_fused.eval()

        # check whether fused outputs are same in several training steps
        for i in range(train_test_steps):
            x_array = [
                torch.rand(N, C, HWin, HWin, device=device, dtype=dtype)
                for _ in range(B)
            ]
            x_fused = torch.cat([x.unsqueeze(1) for x in x_array], dim=1)

            y_array = [batchNormal2d_array[b](x_array[b]) for b in range(B)]
            y_fused_actual = batchNormal2d_fused(x_fused)
            y_fused_expect = torch.cat([y.unsqueeze(1) for y in y_array],
                                       dim=1)
            try:
                assert_allclose(
                    y_fused_actual.cpu().numpy(),
                    y_fused_expect.cpu().numpy(),
                    rtol=1e-4,
                )
            except AssertionError as e:
                dump_error_msg(e)
Exemple #11
0
def testcase_ConvTranspose2d(
    B=3,
    N=32,
    Cin=4,
    Cout=16,
    kernel_size=3,
    HWin=28,
    stride=1,
    padding=0,
    output_padding=0,
    groups=1,
    bias=True,
    dilation=1,
    padding_mode='zeros',
    output_size=None,
    device=torch.device('cpu'),
    dtype=torch.float,
):
  with torch.no_grad():
    x_array = [
        torch.rand(N, Cin, HWin, HWin, device=device, dtype=dtype)
        for _ in range(B)
    ]
    x_fused = torch.cat([x.unsqueeze(1) for x in x_array], dim=1)
    args = (Cin, Cout, kernel_size)

    # Handle output_padding
    if output_padding != 0:
      stride = output_padding + 1
      dilation = output_padding + 1

    # Handle output_size argument for the forward function
    if output_size:
      # The hardcoded input 57 and 58 are the possible size given stride == 2
      stride = 2
      output_size_arg = output_size if len(output_size) == 2 else (
          output_size[0:1] + output_size[2:])
    else:
      output_size_arg = None

    kwargs = {
        'stride': stride,
        'padding': padding,
        'output_padding': output_padding,
        'groups': groups,
        'bias': bias,
        'dilation': dilation,
        'padding_mode': padding_mode,
        'device': device,
        'dtype': dtype,
    }
    conv_array = [nn.ConvTranspose2d(*args, **kwargs) for _ in range(B)]
    conv_fused = get_hfta_op_for(nn.ConvTranspose2d, B=B)(*args, **kwargs)
    # Init weights and biases.
    for b in range(B):
      conv_fused.snatch_parameters(conv_array[b], b)

    y_array = [
        conv_array[b](x_array[b], output_size=output_size_arg) for b in range(B)
    ]
    y_fused_actual = conv_fused(x_fused, output_size=output_size)
    y_fused_expect = torch.cat([y.unsqueeze(1) for y in y_array], dim=1)
    try:
      assert_allclose(
          y_fused_actual.cpu().numpy(),
          y_fused_expect.cpu().numpy(),
          rtol=1e-4,
          population_threshold=1e-2,
      )
      if output_size:
        assert (
            y_fused_actual.shape == y_fused_expect.shape
        ), "The actual output size ({}) is different from the expected output size ({}).".format(
            y_fused_actual.shape, y_fused_expect.shape)
    except AssertionError as e:
      dump_error_msg(e)