Exemple #1
0
def compare_with_tensorflow(device_type, x_shape, data_type, axis):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    if data_type == "float16":
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    @flow.global_function(type="train", function_config=func_config)
    def SoftmaxJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=-1.0,
                                                            maxval=1.0),
                trainable=True,
            )
            x1 = x
            x = flow.identity(x)
            if data_type == "float16":
                loss = flow.cast(
                    flow.nn.softmax(flow.cast(x, dtype=flow.float16),
                                    axis=axis),
                    dtype=flow.float,
                )
            else:
                loss = flow.nn.softmax(x, axis=axis)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
            total_loss = loss * x1
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.0001]),
                               momentum=0).minimize(total_loss)
            return loss

    of_out = SoftmaxJob().get()
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.softmax(x, axis=axis)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    if data_type == "float16":
        tolerance = 0.001
    else:
        tolerance = 1e-05
    assert np.allclose(of_out.numpy(),
                       tf_out.numpy(),
                       rtol=tolerance,
                       atol=tolerance)
    assert np.allclose(
        test_global_storage.Get("x_diff"),
        tf_x_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
Exemple #2
0
def compare_with_numpy(device_type, input_shape, dtype, size, data_format,
                       interpolation, align_corners):
    assert device_type in ["gpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="predict", function_config=func_config)
    def UpsampleJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "input",
                shape=input_shape,
                dtype=type_name_to_flow_type[dtype],
                initializer=flow.random_uniform_initializer(minval=2,
                                                            maxval=5),
                trainable=False,
            )
            loss = flow.layers.upsample_2d(
                x,
                size=size,
                data_format=data_format,
                interpolation=interpolation,
                align_corners=align_corners,
            )
            flow.watch(x, test_global_storage.Setter("x1"))
            flow.watch(loss, test_global_storage.Setter("loss1"))
            return loss

    of_out = UpsampleJob().get()
    channel_pos = "channels_first" if data_format.startswith(
        "NC") else "channels_last"
    if align_corners:
        assert interpolation == "bilinear"
        x = test_global_storage.Get("x1")
        if data_format == "NHWC":
            x = np.transpose(x, axes=[0, 3, 1, 2])
        coeffs_dict = {"bilinear": linear_coeffs}
        coeffs = coeffs_dict[interpolation]
        scaler = "align_corners"
        np_out = interpolate_nd(x, coeffs, scale_factors=size,
                                scaler=scaler).astype(np.float32)
        of_out_np = of_out.numpy()
        if data_format == "NHWC":
            of_out_np = np.transpose(of_out_np, axes=[0, 3, 1, 2])
        assert np.allclose(of_out_np, np_out, rtol=1e-05, atol=1e-05)
    else:
        x = test_global_storage.Get("x1")
        if data_format == "NHWC":
            x = np.transpose(x, axes=[0, 3, 1, 2])
        coeffs_dict = {"bilinear": linear_coeffs, "nearest": nearest_coeffs}
        coeffs = coeffs_dict[interpolation]
        scaler = "pytorch_half_pixel"
        np_out = interpolate_nd(x, coeffs, scale_factors=size,
                                scaler=scaler).astype(np.float32)
        of_out_np = of_out.numpy()
        if data_format == "NHWC":
            of_out_np = np.transpose(of_out_np, axes=[0, 3, 1, 2])
        assert np.allclose(of_out_np, np_out, rtol=1e-05, atol=1e-05)
def compare_with_tensorflow(device_type, data_type, label_type, num_classes,
                            batch_size):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.gpu_device_num(0)
        flow.config.cpu_device_num(4)
    else:
        flow.config.gpu_device_num(4)
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def SparseSoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder(
        (batch_size, ), dtype=type_name_to_flow_type[label_type])):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=(batch_size, num_classes),
                dtype=type_name_to_flow_type[data_type],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            prediction = flow.nn.softmax(logits=x)
        with flow.scope.placement(device_type, "0:0-3"):
            lebels_distribute = flow.distribute.broadcast()
            prediction_distribute = flow.distribute.split(
                len(prediction.shape) - 1)
            loss = flow.nn.sparse_cross_entropy(
                labels=labels.with_distribute(lebels_distribute),
                prediction=prediction.with_distribute(prediction_distribute),
            )
        with flow.scope.placement(device_type, "0:0"):
            loss = flow.math.square(loss)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.0001]),
                               momentum=0).minimize(loss)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
        return loss

    labels = np.random.randint(0, num_classes, size=(batch_size, )).astype(
        type_name_to_np_type[label_type])
    of_out = SparseSoftmaxCrossEntropyWithLogitsJob(labels).get()
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, x)
        tf_out = tf.math.square(tf_out)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-05, atol=1e-05)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-05,
                       atol=1e-05)
    flow.clear_default_session()
def compare_with_tensorflow(device_type, activation_type, shape, data_type):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    flow.config.enable_debug_mode(True)
    func_config = flow.FunctionConfig()
    if data_type == flow.float16:
        func_config.enable_auto_mixed_precision(True)
        data_type = flow.float

    func_config.default_data_type(data_type)

    of_activation_map = {
        "relu": flow.nn.relu,
        "sigmoid": flow.math.sigmoid,
        "tanh": flow.math.tanh,
    }
    tf_activation_map = {
        "relu": tf.nn.relu,
        "sigmoid": tf.math.sigmoid,
        "tanh": tf.math.tanh,
    }

    @flow.global_function(type="train", function_config=func_config)
    def ActivationJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=shape,
                dtype=data_type,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = of_activation_map[activation_type](x)
            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [1e-4])
            flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    of_out = ActivationJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf_activation_map[activation_type](x)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)

    rtol = 1e-5
    atol = 1e-5
    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol, atol)
    assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(),
                       rtol, atol)
Exemple #5
0
def compare_with_numpy(test_case, device_type, input_shape, start_end_dim):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()

    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    start_dim = start_end_dim[0]
    end_dim = start_end_dim[1]

    @flow.global_function(type="train", function_config=func_config)
    def FlattenJob() -> flow.typing.Numpy:
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "in",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=2, maxval=5),
                trainable=True,
            )

            loss = flow.flatten(x, start_dim=start_dim, end_dim=end_dim)
            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0
            ).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = FlattenJob()

    # Numpy
    of_x = test_global_storage.Get("x")
    of_x_shape = of_x.shape
    of_x_diff = test_global_storage.Get("x_diff")

    true_end_dim = end_dim + len(of_x_shape) if end_dim < 0 else end_dim
    new_shape = []
    for i in range(0, start_dim):
        new_shape.append(of_x_shape[i])
    flatten_dim = 1
    for i in range(start_dim, true_end_dim + 1):
        flatten_dim *= of_x_shape[i]
    new_shape.append(flatten_dim)
    for i in range(true_end_dim + 1, len(of_x_shape)):
        new_shape.append(of_x_shape[i])

    np_out = np.reshape(of_x, tuple(new_shape))

    test_case.assertTrue(of_out.shape == np_out.shape)
    test_case.assertTrue(np.allclose(of_out, np_out, rtol=1e-5, atol=1e-5))
    test_case.assertTrue(
        np.allclose(of_x_diff, np.ones(of_x_diff.shape), rtol=1e-5, atol=1e-5)
    )
def compare_with_tensorflow(device_type, x_shape, y_shape, dtype, axis):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())
    func_config.default_data_type(flow.float)
    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    @flow.global_function(func_config)
    def ConcatJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=type_name_to_flow_type[dtype],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            y = flow.get_variable(
                "y",
                shape=y_shape,
                dtype=type_name_to_flow_type[dtype],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            x = flow.cast_to_current_logical_view(x)
            y = flow.cast_to_current_logical_view(y)
            loss = flow.concat([x, y], axis)
            flow.losses.add_loss(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(y, test_global_storage.Setter("y"))
            flow.watch_diff(y, test_global_storage.Setter("y_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = ConcatJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        y = tf.Variable(test_global_storage.Get("y"))
        tf_out = tf.concat([x, y], axis)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_y_diff = tape.gradient(tf_out, y, loss_diff)

    assert np.array_equal(of_out.numpy(), tf_out.numpy())
    assert np.array_equal(test_global_storage.Get("x_diff"), tf_x_diff.numpy())
    assert np.array_equal(test_global_storage.Get("y_diff"), tf_y_diff.numpy())
def compare_with_tensorflow(device_type, input_shape, in_dtype, out_dtype,
                            test_fuse_cast_scale_pass):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()

    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.enable_fuse_cast_scale(True)

    @flow.global_function(type="predict", function_config=func_config)
    def FusedCastScaleJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "in",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(),
                trainable=True,
            )
            scale = flow.get_variable(
                "scale",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(),
                trainable=False,
            )
            loss = flow.cast(x, dtype=type_name_to_flow_type[in_dtype])
            if test_fuse_cast_scale_pass:
                loss = flow.cast(
                    loss, dtype=type_name_to_flow_type[out_dtype]) * flow.cast(
                        scale, dtype=type_name_to_flow_type[out_dtype])
            else:
                loss = fused_cast_scale(
                    loss,
                    flow.cast(scale, dtype=type_name_to_flow_type[out_dtype]),
                    name="fused_cast_scale",
                )
            loss = flow.cast(loss, dtype=flow.float)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch(scale, test_global_storage.Setter("scale"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = FusedCastScaleJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        scale = tf.Variable(test_global_storage.Get("scale"))
        tf_out = tf.cast(x, dtype=type_name_to_np_type[in_dtype])
        tf_out = tf.cast(tf_out,
                         dtype=type_name_to_np_type[out_dtype]) * tf.cast(
                             scale, dtype=type_name_to_np_type[out_dtype])
        tf_out = tf.cast(tf_out, dtype=tf.float32)

    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
def _test_batchnorm_relu(test_case, input_shape, axis, data_type):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.consistent_view())
    func_config.default_data_type(flow.float32)

    @flow.global_function(type="train", function_config=func_config)
    def test_job(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),):
        v = flow.get_variable(
            name="v",
            shape=(1,),
            dtype=flow.float32,
            initializer=flow.zeros_initializer(),
        )

        x = x + v

        x1 = flow.identity(x)
        x2 = flow.identity(x)

        flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
        flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))

        x1 = flow.cast(x1, data_type)
        x2 = flow.cast(x2, data_type)

        y1 = flow.layers.batch_normalization_relu(x1, axis=axis, name="BN1")
        y2 = flow.math.relu(flow.layers.batch_normalization(x2, axis=axis, name="BN2"))

        y1 = flow.cast(y1, flow.float32)
        y2 = flow.cast(y2, flow.float32)

        flow.watch(y1, test_global_storage.Setter("y1"))
        flow.watch(y2, test_global_storage.Setter("y2"))

        y1 = flow.where(flow.math.greater(y2, v), y1, v)
        y2 = flow.where(flow.math.greater(y1, v), y2, v)

        loss = y1 + y2
        flow.optimizer.SGD(
            flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0
        ).minimize(flow.math.reduce_sum(loss))

        return loss

    x = np.random.rand(*input_shape).astype(np.float32)

    test_job(x).get()

    tol = 1e-3 if data_type == flow.float16 else 1e-5

    y1 = test_global_storage.Get("y1")
    y2 = test_global_storage.Get("y2")

    test_case.assertTrue(np.allclose(y1, y2, rtol=tol, atol=tol))
    x1_diff = test_global_storage.Get("x1_diff")
    x2_diff = test_global_storage.Get("x2_diff")
    test_case.assertTrue(np.allclose(x1_diff, x2_diff, rtol=tol, atol=tol))
def _compare_with_numpy(test_case, np_func, x, y, axis, keepdims=True):
    x = test_global_storage.Get("x")
    dx = test_global_storage.Get("x_diff")
    np_y = np_func(x, axis=axis, keepdims=True)
    test_case.assertTrue(np.allclose(y, np_y, rtol=1e-5, atol=1e-5))
    mask = np.where(x == y, 1, 0)
    count = np.add.reduce(mask, axis=axis, keepdims=True)
    np_dx = np.where(x == y, 1 / count, 0)
    test_case.assertTrue(np.allclose(dx, np_dx, rtol=1e-5, atol=1e-5))
Exemple #10
0
def compare_with_np(device_type, input_tensor, dim, dtype):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())
    func_config.default_placement_scope(
        flow.scope.placement(device_type, "0:0"))

    output_np = np.diag(input_tensor, dim)
    output_shape = output_np.shape
    input_shape = input_tensor.shape
    output_dtype = output_np.dtype
    grad = np.random.random(output_shape).astype(output_dtype)

    @flow.global_function(type="train", function_config=func_config)
    def diag_job(
        input_tensor: tp.Numpy.Placeholder(shape=(input_shape),
                                           dtype=flow.float),
    ) -> tp.Numpy:
        input_var = flow.get_variable(
            "input_tensor",
            shape=(input_shape),
            dtype=flow.float,
            initializer=flow.zeros_initializer(),
            trainable=True,
        )

        input_tensor = input_tensor + input_var
        input_tensor = flow.cast_to_current_logical_view(input_tensor)
        input_tensor = flow.cast(input_tensor, type_name_to_flow_type[dtype])
        output = flow.diag(input_tensor, dim)
        if (output.dtype == flow.int64 or output.dtype == flow.int8
                or output.dtype == flow.int32):
            output = flow.cast(output, flow.float)
        flow.optimizer.Adam(
            flow.optimizer.PiecewiseConstantScheduler([],
                                                      [1e-4])).minimize(output)

        flow.watch(input_tensor, test_global_storage.Setter("x"))
        flow.watch_diff(input_tensor, test_global_storage.Setter("x_diff"))
        flow.watch(output, test_global_storage.Setter("output"))
        flow.watch_diff(output, test_global_storage.Setter("output_diff"))

        return output

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    output_of = diag_job(input_tensor)
    output_diff = test_global_storage.Get("output_diff").astype(dtype)
    x_diff_of = test_global_storage.Get("x_diff").astype(dtype)

    # np
    x_diff_np = diag_grad_np(input_tensor, dim, output_np, output_diff)

    assert np.allclose(output_of, output_np)
    assert np.allclose(x_diff_of, x_diff_np)
Exemple #11
0
def compare_with_tensorflow(device_type, x_shape, y_shape, dtype, axis):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def ConcatJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=type_name_to_flow_type[dtype],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            y = flow.get_variable(
                "y",
                shape=y_shape,
                dtype=type_name_to_flow_type[dtype],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            x = flow.cast_to_current_logical_view(x)
            y = flow.cast_to_current_logical_view(y)
            loss = flow.concat([x, y], axis)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(y, test_global_storage.Setter("y"))
            flow.watch_diff(y, test_global_storage.Setter("y_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    of_out = ConcatJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        y = tf.Variable(test_global_storage.Get("y"))
        tf_out = tf.concat([x, y], axis)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_y_diff = tape.gradient(tf_out, y, loss_diff)

    assert np.array_equal(of_out.numpy(), tf_out.numpy())
    assert np.array_equal(test_global_storage.Get("x_diff"), tf_x_diff.numpy())
    assert np.array_equal(test_global_storage.Get("y_diff"), tf_y_diff.numpy())
def compare_with_tensorflow(device_type, data_type, label_type, num_classes,
                            batch_size):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    @flow.global_function(func_config)
    def SparseSoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder(
        (batch_size, ), dtype=type_name_to_flow_type[label_type])):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=(batch_size, num_classes),
                dtype=type_name_to_flow_type[data_type],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            prediction = flow.nn.softmax(logits=x)
            loss = flow.nn.sparse_cross_entropy(labels=labels,
                                                prediction=prediction)
            loss = flow.math.square(loss)
            flow.losses.add_loss(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
        return loss

    # fake labels
    labels = np.random.randint(0, num_classes, size=(batch_size, )).astype(
        type_name_to_np_type[label_type])

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = SparseSoftmaxCrossEntropyWithLogitsJob(labels).get()

    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, x)
        tf_out = tf.math.square(tf_out)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)

    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)
    flow.clear_default_session()
Exemple #13
0
def compare_with_tensorflow(device_type, input_shape, dtype, size, data_format,
                            interpolation):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()

    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def UpsampleJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "input",
                shape=input_shape,
                dtype=type_name_to_flow_type[dtype],
                initializer=flow.random_uniform_initializer(minval=2,
                                                            maxval=5),
                trainable=True,
            )

            loss = flow.layers.upsample_2d(x,
                                           size=size,
                                           data_format=data_format,
                                           interpolation=interpolation)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = UpsampleJob().get()
    channel_pos = "channels_first" if data_format.startswith(
        "NC") else "channels_last"
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x").astype(np.float32))
        tf_out = tf.keras.layers.UpSampling2D(size=size,
                                              data_format=channel_pos,
                                              interpolation=interpolation)(x)

    loss_diff = test_global_storage.Get("loss_diff").astype(np.float32)
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)
Exemple #14
0
def compare_with_tensorflow(device_type, data_type, shape):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    def np_softmax(x):
        return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

    @flow.global_function(type="train", function_config=func_config)
    def SoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder(
        shape, dtype=type_name_to_flow_type[data_type])):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=shape,
                dtype=type_name_to_flow_type[data_type],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = flow.nn.softmax_cross_entropy_with_logits(labels=labels,
                                                             logits=x)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
        return loss

    # fake labels
    labels = np_softmax(np.random.uniform(size=shape)).astype(
        type_name_to_np_type[data_type])

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = SoftmaxCrossEntropyWithLogitsJob(labels).get()

    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.softmax_cross_entropy_with_logits(labels, x)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)

    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)
    flow.clear_default_session()
Exemple #15
0
def compare_with_tensorflow(device_type, a_shape, b_shape, transpose_a, transpose_b):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    @flow.global_function(func_config)
    def MatmulJob():
        with flow.scope.placement(device_type, "0:0"):
            a = flow.get_variable(
                "a",
                shape=a_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10, maxval=10),
                trainable=True,
            )
            b = flow.get_variable(
                "b",
                shape=b_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10, maxval=10),
                trainable=True,
            )
            loss = flow.matmul(a, b, transpose_a, transpose_b)
            flow.losses.add_loss(loss)

            flow.watch(a, test_global_storage.Setter("a"))
            flow.watch_diff(a, test_global_storage.Setter("a_diff"))
            flow.watch(b, test_global_storage.Setter("b"))
            flow.watch_diff(b, test_global_storage.Setter("b_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = MatmulJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        a = tf.Variable(test_global_storage.Get("a"))
        b = tf.Variable(test_global_storage.Get("b"))
        tf_out = tf.matmul(a, b, transpose_a, transpose_b)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_a_diff = tape.gradient(tf_out, a, loss_diff)
    tf_b_diff = tape.gradient(tf_out, b, loss_diff)

    assert np.allclose(of_out.numpy(), tf_out.numpy(), atol=1e-03), np.max(
        np.abs(of_out.numpy() - tf_out.numpy())
    )
    assert np.allclose(test_global_storage.Get("a_diff"), tf_a_diff.numpy(), atol=1e-03)
    assert np.allclose(test_global_storage.Get("b_diff"), tf_b_diff.numpy(), atol=1e-03)
Exemple #16
0
def test_TestMultiInput_grad_mirrored_inplace(test_case):
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.mirrored_view())

    shape = (
        3,
        3,
    )

    @flow.global_function(type="train", function_config=func_config)
    def TestMultiInputJob():
        with flow.scope.placement("gpu", "0:0"):
            x1 = flow.get_variable(
                "x1",
                shape=shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            x2 = flow.get_variable(
                "x2",
                shape=shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = TestMultiInput(x1, x2)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x1, test_global_storage.Setter("x1"))
            flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
            flow.watch(x2, test_global_storage.Setter("x2"))
            flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))
            return loss

    check_point = flow.train.CheckPoint()
    check_point.init()
    out = TestMultiInputJob().get()
    x1_diff = test_global_storage.Get("x1_diff")
    x2_diff = test_global_storage.Get("x2_diff")

    expect_out = test_global_storage.Get("x1")
    expect_x1_diff = np.ones(shape, dtype=np.float32)
    expect_x2_diff = np.ones(shape, dtype=np.float32) * 2.0
    # print(x1_diff, x2_diff)
    # print(expect_x1_diff, expect_x2_diff)
    assert np.allclose(out.numpy(), expect_out)
    assert np.allclose(x1_diff, expect_x1_diff)
    assert np.allclose(x2_diff, expect_x2_diff)
Exemple #17
0
def compare_with_tensorflow(device_type, x_shape, data_type, axes):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    if max(axes) >= len(x_shape):
        return

    @flow.global_function(type="train", function_config=func_config)
    def MomentsJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=type_name_to_flow_type[data_type],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            (m, v) = flow.nn.moments(x, axes)
            loss = m + v
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return (m, v)

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = MomentsJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.moments(x, axes)
        tf_loss = tf_out[0] + tf_out[1]
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_loss, x, loss_diff)
    for i in range(2):
        assert np.allclose(of_out[i].numpy(),
                           tf_out[i].numpy(),
                           rtol=1e-5,
                           atol=1e-5)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)
def compare_with_tensorflow(device_type, data_type, shape):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    dtype = type_name_to_flow_type[data_type]

    def np_sigmoid(x):
        return 1 / (1 + np.exp(-x))

    @flow.global_function(type="train", function_config=func_config)
    def SigmoidCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder(
        shape, dtype)):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=shape,
                dtype=type_name_to_flow_type[data_type],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = flow.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                             logits=x)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.0001]),
                               momentum=0).minimize(loss)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
            return loss

    labels = np_sigmoid(np.random.randint(0, 10, size=shape)).astype(
        type_name_to_np_type[data_type])
    of_out = SigmoidCrossEntropyWithLogitsJob(labels).get()
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.sigmoid_cross_entropy_with_logits(labels, x)
        loss_diff = test_global_storage.Get("loss_diff")
        tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tolerance = 1e-05
    assert np.allclose(of_out.numpy(),
                       tf_out.numpy(),
                       rtol=tolerance,
                       atol=tolerance)
    assert np.allclose(
        test_global_storage.Get("x_diff"),
        tf_x_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
    flow.clear_default_session()
Exemple #19
0
def test_TestMultiInput_grad_mirrored_inplace(test_case):
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.mirrored_view())
    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    shape = (
        3,
        3,
    )

    @flow.global_function(func_config)
    def TestMultiInputJob():
        with flow.scope.placement("gpu", "0:0"):
            x1 = flow.get_variable(
                "x1",
                shape=shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10, maxval=10),
                trainable=True,
            )
            x2 = flow.get_variable(
                "x2",
                shape=shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10, maxval=10),
                trainable=True,
            )
            loss = TestMultiInput(x1, x2)
            flow.losses.add_loss(loss)

            flow.watch(x1, test_global_storage.Setter("x1"))
            flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
            flow.watch(x2, test_global_storage.Setter("x2"))
            flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))
            return loss

    check_point = flow.train.CheckPoint()
    check_point.init()
    out = TestMultiInputJob().get()
    x1_diff = test_global_storage.Get("x1_diff")
    x2_diff = test_global_storage.Get("x2_diff")

    expect_out = test_global_storage.Get("x1")
    expect_x1_diff = np.ones(shape, dtype=np.float32)
    expect_x2_diff = np.ones(shape, dtype=np.float32) * 2.0
    # print(x1_diff, x2_diff)
    # print(expect_x1_diff, expect_x2_diff)
    assert np.allclose(out.numpy(), expect_out)
    assert np.allclose(x1_diff, expect_x1_diff)
    assert np.allclose(x2_diff, expect_x2_diff)
Exemple #20
0
def compare_fused_with_no_fused(test_case,
                                batch_size,
                                seq_len,
                                num_heads,
                                head_size,
                                fp16,
                                verbose=False):
    hidden_size = num_heads * 3 * head_size

    input = gen_random_input((seq_len, batch_size, hidden_size))

    # fused op
    func = make_self_attn_qk_v_func(batch_size, seq_len, num_heads, head_size,
                                    True, fp16)
    qmk, v = func(input)

    # unfused op
    func_ = make_self_attn_qk_v_func(batch_size, seq_len, num_heads, head_size,
                                     False, fp16)
    qmk_, v_ = func_(input)

    # np
    _q, _k, _v = np_qkv(input, head_size)
    _qmk = np_bgemm(_q.transpose(1, 2, 0, 3), _k.transpose(1, 2, 3, 0),
                    get_alpha(head_size))
    _v = _v.transpose(1, 2, 0, 3)

    if verbose:
        print("")
        print("=" * 80)
        print(f"input: {input.shape}\n{input}")
        print(f"_q: {_q.shape}\n{_q}")
        print(f"_k: {_k.shape}\n{_k}")
        print(f"_v: {_v.shape}\n{_v}")
        print(f"_qmk: {_qmk.shape}\n{_qmk}")
        print(f"qmk: {qmk.shape}\n{qmk}")
        print(f"qmk_: {qmk_.shape}\n{qmk_}")
        diff = qmk - qmk_
        print("abs diff mean:", np.abs(diff).mean())
        print("abs diff max:", np.abs(diff).max())

    test_case.assertTrue(np.allclose(qmk, qmk_))
    test_case.assertTrue(np.allclose(qmk, _qmk))
    test_case.assertTrue(np.allclose(v, v_))
    test_case.assertTrue(np.allclose(v, _v))

    h_grad = test_global_storage.Get("h_grad_fused")
    h_grad_ = test_global_storage.Get("h_grad")
    if verbose:
        print(f"h_grad: {h_grad.shape}\n{h_grad}")
        print(f"h_grad_: {h_grad_.shape}\n{h_grad_}")
    test_case.assertTrue(np.allclose(h_grad, h_grad_))
def of_run(device_type, x_shape, data_type, rate, seed):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()

    if data_type == "float16":
        func_config.enable_auto_mixed_precision(True)
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    @flow.global_function(func_config)
    def DropoutJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=1,
                                                            maxval=10),
                trainable=True,
            )
            of_out = flow.nn.dropout(x, rate=rate, seed=seed, name="dropout")
            loss = flow.math.square(of_out)
            flow.losses.add_loss(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(of_out, test_global_storage.Setter("out"))
            flow.watch_diff(of_out, test_global_storage.Setter("out_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = DropoutJob().get()

    of_out = test_global_storage.Get("out")
    out_diff = test_global_storage.Get("out_diff")
    assert np.allclose([1 - np.count_nonzero(of_out) / of_out.size], [rate],
                       atol=rate / 5)
    x = test_global_storage.Get("x")
    x_diff = test_global_storage.Get("x_diff")
    out_scale = of_out[np.where(of_out != 0)] / x[np.where(of_out != 0)]
    diff_scale = x_diff[np.where(of_out != 0)] / out_diff[np.where(
        of_out != 0)]
    assert np.allclose(out_scale, 1.0 / (1.0 - rate), atol=1e-5)
    assert np.allclose(diff_scale, 1.0 / (1.0 - rate), atol=1e-5)
Exemple #22
0
def of_run(device_type, x_shape, data_type, rate):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    if data_type == "float16":
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    @flow.global_function(type="train", function_config=func_config)
    def DropoutJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=-1,
                                                            maxval=1),
                trainable=True,
            )
            if data_type == "float16":
                x = flow.cast(flow.cast(x, flow.float16), dtype)
                of_out = flow.cast(
                    flow.nn.dropout(flow.cast(x, flow.float16),
                                    rate=rate,
                                    name="dropout"),
                    dtype,
                )
            else:
                of_out = flow.nn.dropout(x, rate=rate, name="dropout")
            loss = flow.math.square(of_out)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.0001]),
                               momentum=0).minimize(loss)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(of_out, test_global_storage.Setter("out"))
            flow.watch_diff(of_out, test_global_storage.Setter("out_diff"))
            return loss

    of_out = DropoutJob().get()
    of_out = test_global_storage.Get("out")
    out_diff = test_global_storage.Get("out_diff")
    assert np.allclose([1 - np.count_nonzero(of_out) / of_out.size], [rate],
                       atol=1e-4)
    x = test_global_storage.Get("x")
    x_diff = test_global_storage.Get("x_diff")
    out_scale = of_out[np.where(of_out != 0)] / x[np.where(of_out != 0)]
    diff_scale = x_diff[np.where(of_out != 0)] / out_diff[np.where(
        of_out != 0)]
    assert np.allclose(out_scale, 1.0 / (1.0 - rate), atol=1e-05)
    assert np.allclose(diff_scale, 1.0 / (1.0 - rate), atol=1e-05)
Exemple #23
0
def _run_test(test_case, device, out_shape, num_segments, segment_ids_shape):
    segment_ids = _gen_segment_ids(out_shape, num_segments, segment_ids_shape)
    data = _gen_data(out_shape, num_segments, segment_ids_shape)

    unsorted_batch_segment_sum_out = _make_unsoted_segment_sum_fn(
        device, data, segment_ids, num_segments).get()
    out_ndarray = unsorted_batch_segment_sum_out.numpy()
    grad_in_ndarray = test_global_storage.Get("x_diff")
    grad_out_ndarray = test_global_storage.Get("loss_diff")

    _check(test_case, data, segment_ids, out_shape, out_ndarray)
    _check_bw(test_case, grad_out_ndarray, segment_ids, grad_in_ndarray.shape,
              grad_in_ndarray)
Exemple #24
0
def compare_with_tensorflow(device_type, x_shape, data_type, axis):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()

    if data_type == "float16":
        func_config.enable_auto_mixed_precision(True)
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    @flow.global_function(func_config)
    def SoftmaxJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = flow.nn.softmax(x, axis=axis)
            flow.losses.add_loss(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = SoftmaxJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.softmax(x, axis=axis)

    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)
def compare_reduce_sum_with_tensorflow(device_type,
                                       input_shape,
                                       axis,
                                       keepdims,
                                       rtol=1e-5,
                                       atol=1e-5):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()

    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    @flow.global_function(func_config)
    def ReduceSumJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "in",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=2,
                                                            maxval=5),
                trainable=True,
            )
            loss = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims)
            flow.losses.add_loss(loss)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = ReduceSumJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.math.reduce_sum(x, axis=axis, keepdims=keepdims)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)

    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)
Exemple #26
0
def compare_with_np(device_type, x_shape, like0_shape, like1_shape, dtype):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def SplitLikeJob(x: oft.Numpy.Placeholder(x_shape, dtype=flow.float)):
        v = flow.get_variable(
            "x",
            shape=x_shape,
            dtype=flow.float,
            initializer=flow.constant_initializer(0),
            trainable=True,
        )
        x += v

        like0 = flow.constant(0, dtype=flow.float, shape=like0_shape)
        like1 = flow.constant(0, dtype=flow.float, shape=like1_shape)

        with flow.scope.placement("gpu", "0:0"):
            y0, y1 = split_like(x, [like0, like1], "split_like")
            loss = y0
        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [1e-4]),
                           momentum=0).minimize(loss)

        flow.watch(x, test_global_storage.Setter("x"))
        flow.watch_diff(x, test_global_storage.Setter("x_diff"))
        flow.watch(loss, test_global_storage.Setter("loss"))
        flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

        return y0, y1

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    x = np.random.randn(*x_shape).astype(np.float32)
    y0, y1 = SplitLikeJob(x).get()
    assert (like0_shape[0] + like1_shape[0]) == x_shape[0]
    np_y0 = x[0:like0_shape[0]]
    np_y1 = x[like0_shape[0]:]
    zeros = np.zeros(np_y1.shape, dtype=np.float32)
    np_x_diff = np.concatenate([test_global_storage.Get("loss_diff"), zeros],
                               axis=0)
    assert np.array_equal(y0.numpy(), np_y0)
    assert np.array_equal(y1.numpy(), np_y1)
    assert np.array_equal(test_global_storage.Get("x_diff"), np_x_diff)
Exemple #27
0
def RunOneflowBiasAdd(data_type, device_type, value, bias, flow_args):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def FlowJob(
            value: oft.Numpy.Placeholder(value.shape),
            bias: oft.Numpy.Placeholder(bias.shape),
    ):
        with flow.scope.placement(device_type, "0:0"):
            value += flow.get_variable(
                name="v1",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            bias += flow.get_variable(
                name="v2",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            if data_type == "float16":
                comp_value = flow.cast(value, dtype=flow.float16)
                comp_bias = flow.cast(bias, dtype=flow.float16)
            else:
                comp_value = value
                comp_bias = bias
            loss = flow.nn.bias_add(comp_value, comp_bias, *flow_args)
            if data_type == "float16":
                loss = flow.cast(loss, dtype=flow.float)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                         [0]),
                               momentum=0).minimize(loss)

            flow.watch_diff(value, test_global_storage.Setter("value_diff"))
            flow.watch_diff(bias, test_global_storage.Setter("bias_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    y = FlowJob(value, bias).get().numpy()
    value_diff = test_global_storage.Get("value_diff")
    bias_diff = test_global_storage.Get("bias_diff")
    return y, value_diff, bias_diff
Exemple #28
0
def _test_logical_slice(test_case, var_shape, slice_tuples, split_axis,
                        device_tag, flow_dtype, device_num):
    flow.clear_default_session()
    if device_tag == "gpu":
        flow.config.gpu_device_num(device_num)

    @flow.global_function()
    def slice_fn():
        with flow.scope.placement(device_tag, "0:0-{}".format(device_num - 1)):
            var = flow.get_variable(
                name="var",
                shape=var_shape,
                dtype=flow_dtype,
                initializer=flow.random_uniform_initializer(-10,
                                                            10,
                                                            dtype=flow_dtype),
                distribute=flow.distribute.split(split_axis),
            )
            flow.watch(var, test_global_storage.Setter("var"))
            ret = flow.experimental.logical_slice(var, slice_tuples)
            return ret

    checkpoint = flow.train.CheckPoint()
    checkpoint.init()

    of_res = slice_fn().get().numpy()

    var_np = test_global_storage.Get("var")
    slice_objs = []
    for s in slice_tuples:
        slice_objs.append(slice(s[0], s[1], s[2]))
    test_case.assertTrue(np.array_equal(of_res, var_np[tuple(slice_objs)]))
Exemple #29
0
def RunOneflowOp(device_type, flow_op, x, flow_args):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def FlowJob(x: oft.Numpy.Placeholder(x.shape)):
        with flow.scope.placement(device_type, "0:0"):
            x += flow.get_variable(
                name="v1",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            loss = flow_op(x, *flow_args)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                         [0]),
                               momentum=0).minimize(loss)

            flow.watch_diff(x, test_global_storage.Setter("x_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    y = FlowJob(x).get().numpy()
    x_diff = test_global_storage.Get("x_diff")
    return y, x_diff
Exemple #30
0
def compare_with_tensorflow(device_type, input_shape, perm):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()

    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def TransposeJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "input",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=2,
                                                            maxval=5),
                trainable=True,
            )

            loss = flow.transpose(x, perm)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = TransposeJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.transpose(x, perm)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)

    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)