예제 #1
0
 def test_meshgrid_indexing_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.ones((2), dtype=flow.float32, requires_grad=True)
         x2 = flow.ones((2), dtype=flow.float32, requires_grad=True)
         y = flow.meshgrid(x1, x2, indexing="ab")
     test_case.assertTrue(
         "meshgrid: indexing must be one of" in str(context.exception))
예제 #2
0
 def test_meshgrid_tensors_scalar_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.tensor([], dtype=flow.float32, requires_grad=True)
         x2 = flow.ones((1, 2, 3), dtype=flow.float32, requires_grad=True)
         y = flow.meshgrid(x1, x2)
     test_case.assertTrue("Expected scalar or 1D tensor in the tensor list"
                          in str(context.exception))
예제 #3
0
 def test_meshgrid_tensors_dtype_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.ones((2), dtype=flow.float32, requires_grad=True)
         x2 = flow.ones((2), dtype=flow.float16, requires_grad=True)
         y = flow.meshgrid(x1, x2)
     test_case.assertTrue(
         "meshgrid expects all tensors to have the same dtype" in str(
             context.exception))
예제 #4
0
def _test_meshgrid_forawd_scalar(test_case, device, indexing):
    input1 = flow.tensor(np.array(1.0),
                         dtype=flow.float32,
                         device=flow.device(device))
    input2 = flow.tensor(np.array(2.0),
                         dtype=flow.float32,
                         device=flow.device(device))
    (np_x, np_y) = np.meshgrid(input1.numpy(),
                               input2.numpy(),
                               indexing=indexing)
    (of_x, of_y) = flow.meshgrid(input1, input2, indexing=indexing)
    test_case.assertTrue(np.allclose(of_x.numpy(), np_x, 0.0001, 0.0001))
예제 #5
0
 def test_meshgrid_tensors_placement_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.tensor(
             [0.0, 1.0],
             dtype=flow.float32,
             placement=flow.placement("cpu", ranks=[0]),
             sbp=[flow.sbp.broadcast],
         )
         x2 = flow.tensor(
             [0.0, 1.0],
             dtype=flow.float32,
             placement=flow.placement("cpu", ranks=[0]),
             sbp=[flow.sbp.broadcast],
         ).to_local()
         y = flow.meshgrid(x1, x2)
     test_case.assertTrue("meshgrid expects all tensors are global tensor"
                          in str(context.exception))
예제 #6
0
    def __init__(
        self,
        dim,
        window_size,
        num_heads,
        qkv_bias=True,
        qk_scale=None,
        attn_drop=0.0,
        proj_drop=0.0,
    ):

        super().__init__()
        self.dim = dim
        self.window_size = window_size  # Wh, Ww
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim**-0.5

        # define a parameter table of relative position bias
        # Author zzk: we add trunc normal here!
        self.relative_position_bias_table = nn.Parameter(
            flow.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1),
                       num_heads))  # 2*Wh-1 * 2*Ww-1, nH
        self.relative_position_bias_table.trunc_normal_(std=0.02)

        # get pair-wise relative position index for each token inside the window
        coords_h = flow.arange(self.window_size[0])
        coords_w = flow.arange(self.window_size[1])
        coords = flow.stack(flow.meshgrid(*[coords_h, coords_w]))  # 2, Wh, Ww
        coords_flatten = flow.flatten(coords, 1)  # 2, Wh*Ww
        relative_coords = (coords_flatten[:, :, None] -
                           coords_flatten[:, None, :])  # 2, Wh*Ww, Wh*Ww
        relative_coords = relative_coords.permute(1, 2, 0)  # Wh*Ww, Wh*Ww, 2
        relative_coords[:, :,
                        0] += self.window_size[0] - 1  # shift to start from 0
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
        self.register_buffer("relative_position_index",
                             relative_position_index)

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)
        self.softmax = nn.Softmax(dim=-1)
예제 #7
0
def _test_meshgrid_forawd_3tensor(test_case, device, indexing):
    input1 = flow.tensor(np.array([1, 2, 3]),
                         dtype=flow.float32,
                         device=flow.device(device))
    input2 = flow.tensor(np.array([4, 5, 6]),
                         dtype=flow.float32,
                         device=flow.device(device))
    input3 = flow.tensor(np.array([7, 8, 9]),
                         dtype=flow.float32,
                         device=flow.device(device))
    (np_x, np_y, np_z) = np.meshgrid(input1.numpy(),
                                     input2.numpy(),
                                     input3.numpy(),
                                     indexing=indexing)
    (of_x, of_y, of_z) = flow.meshgrid(input1,
                                       input2,
                                       input3,
                                       indexing=indexing)
    test_case.assertTrue(np.allclose(of_x.numpy(), np_x, 0.0001, 0.0001))
예제 #8
0
 def test_meshgrid_tensors_size_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         y = flow.meshgrid([])
     test_case.assertTrue("meshgrid expects a non-empty TensorList" in str(
         context.exception))