Beispiel #1
0
def positional_encoding(position, d_model, name="positional_encoding"):
    """
    Do positional encoding
    :param position: The position
    :param d_model: The hidden dimension in model
    :return: shape like (1, position, d_model)
    """
    with flow.scope.namespace(name):
        # shape = (position, 1)
        input_pos = flow.expand_dims(flow.range(position, dtype=flow.float32, name="pos"), axis=1)

        # shape = (1, d_model)
        input_d_model = flow.expand_dims(flow.range(d_model, dtype=flow.float32, name="d_model"), axis=0)

        angle_rads = get_angles(input_pos, input_d_model, d_model)

        # Get a even range like (0, 2, 4, 6, ....., d_model)
        even_range = flow.range(0, d_model, 2, dtype=flow.int32, name="even_range")

        # Do the sin in even indexes
        even_out = flow.math.sin(flow.gather(angle_rads, even_range, axis=1))

        # Get a odd range like (1, 3, 5, 7, ....., d_model)
        odd_range = flow.range(1, d_model, 2, dtype=flow.int32, name="odd_range")

        # Do the cos in odd indexes
        odd_out = flow.math.cos(flow.gather(angle_rads, odd_range, axis=1))

        # Initialize Position encode constant
        position_encode = flow.constant(0, dtype=flow.float32, shape=(d_model, position), name="pos_ende")

        # Due to the scatter only support row indexes, we need to transpose
        even_out = flow.tensor_scatter_nd_update(position_encode,
                                                 flow.expand_dims(even_range, axis=1),
                                                 flow.transpose(even_out, perm=[1, 0]))

        odd_out = flow.tensor_scatter_nd_update(position_encode,
                                                flow.expand_dims(odd_range, axis=1),
                                                flow.transpose(odd_out, perm=[1, 0]))

        # Add even indexes value and odd indexes value
        out = even_out + odd_out

        # Because We have transposed in even_out and odd_out, So we need to transpose back
        out = flow.transpose(out, perm=[1, 0])
        # Expand dims in dim=0, we get shape like (1, position, d_model)
        out = flow.expand_dims(out, axis=0)

    return out
Beispiel #2
0
    def scatter_nd_update_grad_fn(
            x_def: oft.Numpy.Placeholder(params.shape, dtype=flow.float),
            indices_def: oft.Numpy.Placeholder(indices.shape,
                                               dtype=flow.int32),
            y_def: oft.Numpy.Placeholder(updates.shape, dtype=flow.float),
    ):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "params",
                shape=params.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
            )
            y = flow.get_variable(
                "updates",
                shape=updates.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
            )
            x = x + x_def
            y = y + y_def
            z = flow.tensor_scatter_nd_update(x, indices_def, y)
            flow.losses.add_loss(z)

        flow.watch_diff(x, compare_dz_dx)
        flow.watch_diff(y, compare_dz_dy)
        return z
 def tensor_scatter_nd_update_fn(
     params_def: oft.ListNumpy.Placeholder(params.shape, dtype=flow.float),
     indices_def: oft.ListNumpy.Placeholder(indices_static_shape, dtype=flow.int32),
     updates_def: oft.ListNumpy.Placeholder(updates_static_shape, dtype=flow.float),
 ):
     with flow.scope.placement("gpu", "0:0"):
         return flow.tensor_scatter_nd_update(params_def, indices_def, updates_def)
    def scatter_nd_update_grad_fn(
        x_def: oft.Numpy.Placeholder(params.shape, dtype=flow.float),
        indices_def: oft.Numpy.Placeholder(indices.shape, dtype=flow.int32),
        y_def: oft.Numpy.Placeholder(updates.shape, dtype=flow.float),
    ):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "params",
                shape=params.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
            )
            y = flow.get_variable(
                "updates",
                shape=updates.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
            )
            x = x + x_def
            y = y + y_def
            z = flow.tensor_scatter_nd_update(x, indices_def, y)
            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [1e-3]), momentum=0
            ).minimize(z)

        flow.watch_diff(x, compare_dz_dx)
        flow.watch_diff(y, compare_dz_dy)
        return z
def _test_eager_global_tensor_scatter_nd_update_backward(test_case, placement, sbp):
    origin = random_tensor(1, 16,).to_global(placement, sbp)
    origin.retain_grad()
    indices = choice_tensor(16, (8, 1), replace=False).to_global(
        placement, [flow.sbp.broadcast for _ in range(len(placement.ranks.shape))]
    )
    update = random_tensor(1, 8).to_global(
        placement, [flow.sbp.broadcast for _ in range(len(placement.ranks.shape))]
    )
    update.retain_grad()

    np_origin = origin.oneflow.numpy()
    np_indices = indices.oneflow.numpy().reshape(8)
    np_update = update.oneflow.numpy()

    np_update_grad = np.ones(8)
    np_origin_grad = np.ones(16)
    np_origin_grad[np_indices] = np.zeros(8)

    output = flow.tensor_scatter_nd_update(
        origin.oneflow, indices.oneflow, update.oneflow
    )
    out_sum = output.sum()
    out_sum.backward()

    np_origin[np_indices] = np_update

    test_case.assertTrue(np.allclose(output.numpy(), np_origin, 0.0001, 0.0001))
    test_case.assertTrue(np.allclose(update.oneflow.grad.numpy(), np_update_grad))
    test_case.assertTrue(np.allclose(origin.oneflow.grad.numpy(), np_origin_grad))
def _test_global_tensor_scatter_nd_update_t(
    test_case, placement, sbp, check_graph=False
):

    origin = random_tensor(2, 16, 4, requires_grad=False).to_global(placement, sbp)
    indices = choice_tensor(16, (8, 1), replace=False).to_global(
        placement, [flow.sbp.broadcast for _ in range(len(placement.ranks.shape))]
    )
    update = random_tensor(2, 8, 4, requires_grad=False).to_global(
        placement, [flow.sbp.broadcast for _ in range(len(placement.ranks.shape))]
    )

    np_origin = origin.oneflow.numpy()
    np_indices = indices.oneflow.numpy().reshape(8)
    np_update = update.oneflow.numpy()

    if check_graph:
        tensor_scatter_nd_update = TensorScatterNdUpdate()
        output = tensor_scatter_nd_update(
            origin.oneflow, indices.oneflow, update.oneflow
        )
    else:
        output = flow.tensor_scatter_nd_update(
            origin.oneflow, indices.oneflow, update.oneflow
        )

    np_origin[np_indices] = np_update

    test_case.assertTrue(np.allclose(output.numpy(), np_origin, 0.0001, 0.0001))
def _test_tensor_scatter_nd_update_backward(test_case, device):
    origin = flow.tensor(
        np.arange(8),
        dtype=flow.float,
        device=flow.device(device),
        requires_grad=True,
    )
    indices = flow.tensor(np.array([[1], [6], [4]]),
                          dtype=flow.int,
                          device=flow.device(device))
    of_update = flow.tensor(
        np.array([10.2, 5.1, 12.7]),
        requires_grad=True,
        dtype=flow.float,
        device=flow.device(device),
    )
    np_out = np.array([0.0, 10.2, 2.0, 3.0, 12.7, 5.0, 5.1, 7.0])
    np_update_grad = np.array([1.0, 1.0, 1.0])
    np_origin_grad = np.array([1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0])
    output = flow.tensor_scatter_nd_update(origin, indices, of_update)
    out_sum = output.sum()
    out_sum.backward()
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 0.0001, 0.0001))
    test_case.assertTrue(np.allclose(of_update.grad.numpy(), np_update_grad))
    test_case.assertTrue(np.allclose(origin.grad.numpy(), np_origin_grad))
 def test_tensor_scatter_nd_update_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x = flow.arange(8, dtype=flow.float32, requires_grad=True)
         indices = flow.tensor([[1], [3], [5]])
         updates = flow.tensor([-1, -2, -3],
                               dtype=flow.float64,
                               requires_grad=True)
         y = flow.tensor_scatter_nd_update(x, indices, updates)
     test_case.assertTrue("The dtype of tensor and updates must be same." in
                          str(context.exception))
def _test_tensor_scatter_nd_update(test_case, device):
    origin = flow.tensor(np.arange(8), dtype=flow.float, device=flow.device(device))
    indices = flow.tensor(
        np.array([[1], [6], [4]]), dtype=flow.int, device=flow.device(device)
    )
    update = flow.tensor(
        np.array([10.2, 5.1, 12.7]), dtype=flow.float, device=flow.device(device)
    )
    np_out = np.array([0.0, 10.2, 2.0, 3.0, 12.7, 5.0, 5.1, 7.0])
    output = flow.tensor_scatter_nd_update(origin, indices, update)
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 0.0001, 0.0001))
def _test_tensor_scatter_nd_update_t(test_case, device):
    origin = flow.tensor(np.arange(15).reshape(5, 3),
                         dtype=flow.float,
                         device=flow.device(device))
    indices = flow.tensor(np.array([[0], [4], [2]]),
                          dtype=flow.int,
                          device=flow.device(device))
    update = flow.tensor(
        np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]),
        dtype=flow.float,
        device=flow.device(device),
    )
    np_out = np.array([
        [1.0, 1.0, 1.0],
        [3.0, 4.0, 5.0],
        [3.0, 3.0, 3.0],
        [9.0, 10.0, 11.0],
        [2.0, 2.0, 2.0],
    ])
    output = flow.tensor_scatter_nd_update(origin, indices, update)
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 0.0001, 0.0001))
 def build(self, origin, indices, update):
     return flow.tensor_scatter_nd_update(origin, indices, update)