Ejemplo n.º 1
0
    def test_smooth_l1_loss(_):
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]
        arg_dict["prediction_shape"] = [
            (100, ),
            (10, 10),
        ]
        arg_dict["data_type"] = ["float32", "double"]
        arg_dict["beta"] = [0, 0.5, 1]

        for case in GenArgList(arg_dict):
            device_type, prediction_shape, data_type, beta = case
            assert device_type in ["gpu", "cpu"]
            assert data_type in ["float32", "double", "int8", "int32", "int64"]
            flow.clear_default_session()
            func_config = flow.FunctionConfig()
            func_config.default_data_type(flow.float)

            prediction = np.random.randn(*prediction_shape).astype(
                type_name_to_np_type[data_type])
            label = np.random.randn(*prediction_shape).astype(
                type_name_to_np_type[data_type])

            np_result = gen_numpy_data(prediction, label, beta)

            def assert_prediction_grad(b):
                prediction_grad = np_result["prediction_grad"]
                assert prediction_grad.dtype == type_name_to_np_type[data_type]
                assert np.allclose(prediction_grad, b.numpy()), (
                    case,
                    prediction_grad,
                    b.numpy(),
                )

            @flow.global_function(type="train", function_config=func_config)
            def TestJob(
                prediction: oft.Numpy.Placeholder(
                    prediction_shape, dtype=type_name_to_flow_type[data_type]),
                label: oft.Numpy.Placeholder(
                    prediction_shape, dtype=type_name_to_flow_type[data_type]),
            ):
                v = flow.get_variable(
                    "prediction",
                    shape=prediction_shape,
                    dtype=type_name_to_flow_type[data_type],
                    initializer=flow.constant_initializer(0),
                    trainable=True,
                )
                flow.watch_diff(v, assert_prediction_grad)
                prediction += v
                with flow.scope.placement(device_type, "0:0"):
                    loss = flow.smooth_l1_loss(prediction, label, beta)
                    flow.optimizer.SGD(
                        flow.optimizer.PiecewiseConstantScheduler([], [1e-4]),
                        momentum=0,
                    ).minimize(loss)
                    return loss

            loss_np = np_result["loss"]
            assert loss_np.dtype == type_name_to_np_type[data_type]
            loss = TestJob(prediction, label).get().numpy()
            assert np.allclose(loss_np, loss), (case, loss_np, loss)
Ejemplo n.º 2
0
def _run_test_moving_average_min_max_observer(
    test_case,
    device_type,
    device_num,
    dtype,
    activation_shape,
    quantization_bit,
    quantization_scheme,
    momentum,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_num)
    else:
        flow.config.gpu_device_num(device_num)

    @flow.global_function(type="train", function_config=flow.FunctionConfig())
    def QuantizeJob(activation: oft.Numpy.Placeholder(
        activation_shape, dtype=type_name_to_flow_type[dtype])):
        with flow.scope.placement(device_type, "0:0-%d" % (device_num - 1)):
            x = flow.get_variable(
                "x",
                shape=activation_shape,
                dtype=activation.dtype,
                initializer=flow.zeros_initializer(activation.dtype),
                trainable=True,
            )
            scale, zero_point = flow.quantization.moving_average_min_maxObserver(
                activation,
                quantization_bit,
                quantization_scheme,
                momentum,
            )
            fake = x + activation
            loss = flow.math.reduce_mean(fake)
            flow.optimizer.Adam(
                flow.optimizer.PiecewiseConstantScheduler(
                    [], [0.001]), ).minimize(loss)
        return scale, zero_point

    check_point = flow.train.CheckPoint()
    check_point.init()

    moving_max_np = np.zeros((1, ))
    moving_min_np = np.zeros((1, ))

    for i in range(10):
        activation = (np.random.random(activation_shape) - 0.5).astype(
            type_name_to_np_type[dtype])
        scale, zero_point = QuantizeJob(activation).get()
        _check_moving_average_min_max_observer(
            test_case,
            activation,
            scale.numpy(),
            zero_point.numpy(),
            moving_max_np,
            moving_min_np,
            quantization_bit,
            quantization_scheme,
            momentum,
        )
Ejemplo n.º 3
0
def compare_with_tensorflow(device_type, activation_type, shape, data_type):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    flow.config.enable_debug_mode(True)
    func_config = flow.FunctionConfig()
    if data_type == flow.float16:
        func_config.enable_auto_mixed_precision(True)
        data_type = flow.float

    func_config.default_data_type(data_type)

    of_activation_map = {
        "relu": flow.nn.relu,
        "sigmoid": flow.math.sigmoid,
        "tanh": flow.math.tanh,
    }
    tf_activation_map = {
        "relu": tf.nn.relu,
        "sigmoid": tf.math.sigmoid,
        "tanh": tf.math.tanh,
        #        "gelu": tfa.activations.gelu,
    }

    @flow.global_function(type="train", function_config=func_config)
    def ActivationJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=shape,
                dtype=data_type,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = of_activation_map[activation_type](x)
            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [1e-4])
            flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = ActivationJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf_activation_map[activation_type](x)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)

    rtol = 1e-3 if activation_type is "gelu" else 1e-5
    atol = 1e-3 if activation_type is "gelu" else 1e-5
    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol, atol)
    assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(),
                       rtol, atol)
Ejemplo n.º 4
0
def CompareNnBnWithTensorFlow(
    device_type,
    input_shape,
    data_type,
    axis,
    epsilon,
    input_minval=-10,
    input_maxval=10,
    y_rtol=1e-5,
    y_atol=1e-5,
    x_diff_rtol=1e-5,
    x_diff_atol=1e-5,
):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.consistent_view())
    func_config.default_data_type(flow.float32)

    x = np.random.uniform(low=input_minval,
                          high=input_maxval,
                          size=input_shape).astype(np.float32)
    param_shape = input_shape[axis]
    mean = np.random.uniform(low=input_minval,
                             high=input_maxval,
                             size=param_shape).astype(np.float32)
    variance = np.random.uniform(low=0, high=input_maxval,
                                 size=param_shape).astype(np.float32)
    offset = np.random.uniform(low=input_minval,
                               high=input_maxval,
                               size=param_shape).astype(np.float32)
    scale = np.random.uniform(low=input_minval,
                              high=input_maxval,
                              size=param_shape).astype(np.float32)

    @flow.global_function(type="train", function_config=func_config)
    def FlowNnBnJob(
            x_full_precision: oft.Numpy.Placeholder(x.shape),
            mean: oft.Numpy.Placeholder(mean.shape),
            variance: oft.Numpy.Placeholder(variance.shape),
            offset: oft.Numpy.Placeholder(offset.shape),
            scale: oft.Numpy.Placeholder(scale.shape),
    ):
        with flow.scope.placement(device_type, "0:0"):
            x_full_precision += flow.get_variable(
                name="v1",
                shape=(1, ),
                dtype=flow.float32,
                initializer=flow.zeros_initializer(),
            )
            if data_type == "float16":
                x = flow.cast(x_full_precision, flow.float16)
            else:
                x = x_full_precision
            y = flow.nn.batch_normalization(x,
                                            mean,
                                            variance,
                                            offset,
                                            scale,
                                            epsilon,
                                            axis=axis)
            y = flow.cast(y, flow.float32)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                         [0]),
                               momentum=0).minimize(y)
            flow.watch_diff(x_full_precision,
                            test_global_storage.Setter("x_diff"))
            return y

    check_point = flow.train.CheckPoint()
    check_point.init()
    of_y = FlowNnBnJob(x, mean, variance, offset, scale).get().numpy()
    of_x_diff = test_global_storage.Get("x_diff")

    def TensorFlowNnBn(x, mean, variance, offset, scale):
        tf_params_shape = [1, 1, 1, 1]
        tf_params_shape[axis] = input_shape[axis]
        with tf.GradientTape(persistent=True) as tape:
            x = tf.Variable(x)
            if data_type == "float16":
                x = tf.cast(x, tf.float16)
            mean = tf.Variable(mean.reshape(tf_params_shape))
            variance = tf.Variable(variance.reshape(tf_params_shape))
            offset = tf.Variable(offset.reshape(tf_params_shape))
            scale = tf.Variable(scale.reshape(tf_params_shape))
            y = tf.cast(
                tf.nn.batch_normalization(x, mean, variance, offset, scale,
                                          epsilon),
                tf.float32,
            )
        x_diff = tape.gradient(y, x)
        return y.numpy(), x_diff.numpy()

    tf_y, tf_x_diff = TensorFlowNnBn(x, mean, variance, offset, scale)
    assert np.allclose(of_y, tf_y, rtol=y_rtol, atol=y_atol)
    assert np.allclose(of_x_diff,
                       tf_x_diff,
                       rtol=x_diff_rtol,
                       atol=x_diff_atol)
Ejemplo n.º 5
0
    def setUp(self):
        global _unittest_env_initilized
        global _unittest_worker_initilized
        if has_node_list():
            assert node_size() > 1
            if _unittest_worker_initilized == False:
                master_port = os.getenv("ONEFLOW_TEST_MASTER_PORT")
                assert master_port, "env var ONEFLOW_TEST_MASTER_PORT not set"
                oneflow.env.ctrl_port(int(master_port))
                if enable_init_by_host_list():
                    oneflow.env.machine(node_list())
                    data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
                    if data_port:
                        oneflow.env.data_port(int(data_port))
                    ssh_port = os.getenv("ONEFLOW_TEST_SSH_PORT")
                    print("initializing worker...")
                    oneflow.deprecated.init_worker(scp_binary=True,
                                                   use_uuid=True,
                                                   ssh_port=int(ssh_port))
                    atexit.register(oneflow.deprecated.delete_worker,
                                    ssh_port=ssh_port)
                    _unittest_worker_initilized = True
                else:
                    ctrl_port = os.getenv("ONEFLOW_TEST_CTRL_PORT")
                    config_rank_ctrl_port = -1
                    if ctrl_port:
                        config_rank_ctrl_port = int(ctrl_port)

                    if has_world_size():
                        config_world_size = world_size()
                    else:
                        config_world_size = 0

                    bootstrap_conf_list = oneflow.env.init_bootstrap_confs(
                        node_list(),
                        int(master_port),
                        config_world_size,
                        config_rank_ctrl_port,
                    )

                    data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
                    if data_port:
                        oneflow.env.data_port(int(data_port))

                    ssh_port = os.getenv("ONEFLOW_TEST_SSH_PORT")
                    print("initializing worker...")
                    oneflow.deprecated.init_worker(
                        scp_binary=True,
                        use_uuid=True,
                        ssh_port=int(ssh_port),
                        bootstrap_conf_list=bootstrap_conf_list,
                    )
                    atexit.register(
                        oneflow.deprecated.delete_worker_by_bootstrap,
                        ssh_port=ssh_port)
                    _unittest_worker_initilized = True

        log_dir = os.getenv("ONEFLOW_TEST_LOG_DIR")
        if log_dir:
            oneflow.env.log_dir(log_dir)

        if _unittest_env_initilized == False:
            oneflow.env.init()
            _unittest_env_initilized = True

        oneflow.clear_default_session()
        oneflow.enable_eager_execution(eager_execution_enabled())
        oneflow.experimental.enable_typing_check(typing_check_enabled())
Ejemplo n.º 6
0
def compare_with_numpy_indexed_slices_sgdw(
    device_type,
    model_shape,
    ids_shape,
    grad_shape,
    momentum_beta,
    learning_rate,
    train_iters,
    mul_scalar,
    weight_decay,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float32)
    func_config.indexed_slices_optimizer_conf(
        dict(include_op_names=dict(op_name=["embeddings"])))

    @flow.global_function(type="train", function_config=func_config)
    def testIndexedSlicesSGDW(
        sparse_ids: flow.typing.Numpy.Placeholder(ids_shape, dtype=flow.int32),
    ) -> flow.typing.Numpy:
        with flow.scope.placement(device_type, "0:0"):
            embedding_table = flow.get_variable(
                name="embeddings",
                shape=model_shape,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            embedding = flow.gather(params=embedding_table * mul_scalar,
                                    indices=sparse_ids)
            loss = flow.math.reduce_mean(embedding)
            flow.optimizer.SGDW(
                flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
                momentum=momentum_beta,
                weight_decay=weight_decay,
            ).minimize(loss)

            return embedding_table

    sparse_ids = np.random.randint(model_shape[0],
                                   size=ids_shape).astype(np.int32)

    init_value = None
    for i in range(train_iters + 1):
        x = testIndexedSlicesSGDW(sparse_ids)
        if i == 0:
            init_value = np.copy(x)

    def indexed_slices_update_numpy(
        param,
        unique_dict,
        iter,
        momentum,
        lr=0.001,
        momentum_beta=0,
        weight_decay=0.9,
    ):
        param_t = np.copy(param)
        momentum_t = np.copy(momentum)
        for ids in unique_dict.keys():
            next_momentum = momentum_beta * momentum_t[ids] - lr * unique_dict[
                ids]
            momentum_t[ids] = next_momentum
            param_t_o = param[
                ids] + next_momentum - lr * weight_decay * param[ids]
            param_t[ids] = param_t_o

        return param_t, momentum_t

    param = init_value
    gradient = np.full(grad_shape, float(mul_scalar) / np.prod(grad_shape))
    momentum = np.zeros(param.shape)
    unique_dict = unique_grads(sparse_ids, gradient)

    for i in range(train_iters):
        param, momentum = indexed_slices_update_numpy(param, unique_dict, i,
                                                      momentum, learning_rate,
                                                      momentum_beta,
                                                      weight_decay)
    assert np.allclose(
        x.flatten(),
        param.flatten(),
        rtol=1e-4,
        atol=1e-4,
    )
Ejemplo n.º 7
0
def compare_with_tensorflow_adam(device_type, x_shape, beta1, beta2, epsilon,
                                 learning_rate, train_iters):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float32)

    @flow.global_function(type="train", function_config=func_config)
    def testAdam(random_mask: flow.typing.Numpy.Placeholder(
        x_shape, dtype=flow.float32)) -> flow.typing.Numpy:
        with flow.scope.placement(device_type, "0:0-0"):
            x = flow.get_variable(
                name="x",
                shape=x_shape,
                dtype=flow.float32,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            loss = flow.math.reduce_mean(x * random_mask)
            flow.optimizer.Adam(
                flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
                beta1=beta1,
                beta2=beta2,
                epsilon=epsilon,
                do_bias_correction=True,
            ).minimize(loss)
            return x

    # generate random number sequences
    random_masks_seq = []
    for i in range(train_iters + 1):
        random_masks_seq.append(
            np.random.uniform(size=x_shape).astype(np.float32))

    init_value = None
    for i in range(train_iters + 1):
        x = testAdam(random_masks_seq[i])
        if i == 0:
            init_value = np.copy(x)

    var = tf.Variable(init_value)
    opt = tf.keras.optimizers.Adam(
        learning_rate=learning_rate,
        beta_1=beta1,
        beta_2=beta2,
        epsilon=epsilon,
        amsgrad=False,
    )

    for i in range(train_iters):
        with tf.GradientTape() as tape:
            random_mask = tf.Variable(random_masks_seq[i])
            loss = tf.reduce_mean(var * random_mask)
        gradients = tape.gradient(loss, var)
        opt.apply_gradients(zip([gradients], [var]))

    assert np.allclose(
        x.flatten(),
        var.numpy().flatten(),
        rtol=1e-4,
        atol=1e-4,
    )
Ejemplo n.º 8
0
def compare_with_tensorflow(
    device_type,
    x_shape,
    filters,
    kernel_size,
    groups,
    of_padding="SAME",
    tf_padding="SAME",
    stride_h=1,
    stride_w=1,
    data_format="NCHW",
    dilation_h=1,
    dilation_w=1,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.consistent_view())

    if data_format == "NCHW":
        xy_data_transpose = (0, 2, 3, 1)
        weight_data_transpose = (2, 3, 1, 0)
    else:
        xy_data_transpose = (0, 1, 2, 3)
        weight_data_transpose = (1, 2, 3, 0)

    @flow.global_function(type="train", function_config=func_config)
    def ConvJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            if data_format == "NCHW":
                weight_shape = (filters, x.shape[1] // groups, kernel_size,
                                kernel_size)
            else:
                weight_shape = (filters, kernel_size, kernel_size,
                                x.shape[3] // groups)
            weight = flow.get_variable(
                "conv-weight",
                shape=weight_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            loss = flow.nn.conv2d(
                x,
                weight,
                strides=[stride_h, stride_w],
                padding=of_padding,
                data_format=data_format,
                dilations=[dilation_h, dilation_w],
                groups=groups,
            )
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(weight, test_global_storage.Setter("weight"))
            flow.watch_diff(weight, test_global_storage.Setter("weight_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    of_out = ConvJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(
            test_global_storage.Get("x").transpose(xy_data_transpose))
        assert groups > 0
        assert x_shape[1] % groups == 0
        assert filters % groups == 0
        weight = tf.Variable(
            test_global_storage.Get("weight").transpose(weight_data_transpose))

        tf_out = tf.nn.conv2d(
            x,
            weight,
            strides=[1, stride_h, stride_w, 1],
            padding=tf_padding,
            data_format="NHWC",
            dilations=[1, dilation_h, dilation_w, 1],
        )

    loss_diff = test_global_storage.Get("loss_diff").transpose(
        xy_data_transpose)
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_weight_diff = tape.gradient(tf_out, weight, loss_diff)
    idx = np.where(
        np.abs(of_out.numpy().transpose(xy_data_transpose) -
               tf_out.numpy()) > 5e-4)
    assert np.allclose(
        of_out.numpy().transpose(xy_data_transpose),
        tf_out.numpy(),
        rtol=1e-5,
        atol=1e-5,
    )

    assert np.allclose(
        test_global_storage.Get("x_diff").transpose(xy_data_transpose),
        tf_x_diff.numpy(),
        rtol=1e-4,
        atol=1e-4,
    )
    assert np.allclose(
        test_global_storage.Get("weight_diff").transpose(
            weight_data_transpose),
        tf_weight_diff.numpy(),
        rtol=1e-5,
        atol=1e-5,
    )
Ejemplo n.º 9
0
def compare_with_tensorflow(device_type, data_type, x_shape, case):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def ScalarAddByTensorJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            y = flow.get_variable(
                "y",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            if case == "add":
                loss = flow.math.add(x, y)
            elif case == "sub":
                loss = flow.math.subtract(x, y)
            elif case == "mul":
                loss = flow.math.multiply(x, y)
            elif case == "div":
                loss = flow.math.divide(x, y)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch(y, test_global_storage.Setter("y"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch_diff(y, test_global_storage.Setter("y_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    of_out = ScalarAddByTensorJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        y = tf.Variable(test_global_storage.Get("y"))
        if case == "add":
            tf_out = x + y
        elif case == "sub":
            tf_out = x - y
        elif case == "mul":
            tf_out = x * y
        elif case == "div":
            tf_out = x / y
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_y_diff = tape.gradient(tf_out, y, loss_diff)

    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
    assert np.allclose(test_global_storage.Get("x_diff"),
                       tf_x_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)
    assert np.allclose(test_global_storage.Get("y_diff"),
                       tf_y_diff.numpy(),
                       rtol=1e-5,
                       atol=1e-5)
Ejemplo n.º 10
0
def _compare_mseloss_with_np(
    input_shape, target_shape, device_type, machine_ids, device_counts
):
    input = np.random.random(size=input_shape).astype(np.float32)
    target = np.random.random(size=target_shape).astype(np.float32)

    assert device_type in ["cpu", "gpu"]

    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_counts)
    else:
        flow.config.gpu_device_num(device_counts)

    func_config = flow.FunctionConfig()

    def np_mseloss(np_input, np_target):
        np_mse = np.square(np_target - np_input)
        np_mse_mean = np.mean(np_mse)
        np_mse_sum = np.sum(np_mse)

        return {
            "np_mse_loss": np_mse,
            "np_mse_loss_mean": np_mse_mean,
            "np_mse_loss_sum": np_mse_sum,
        }

    def np_mseloss_grad(np_input, np_target):
        elem_cnt = np_input.size
        np_mse_grad_mean = (-2 * (np_target - np_input)) / elem_cnt

        # TODO: if you want to get the grad when the reduction="sum", you can use the follow code
        # np_mse_grad_sum = -2 * (np_target - np_input)

        return {
            "np_mse_grad_mean": np_mse_grad_mean,
        }

    # Use Numpy to compute mseloss
    np_out_mseloss_dict = np_mseloss(input, target)
    # Use Numpy to compute mseloss grad
    np_grad_dict = np_mseloss_grad(input, target)

    def assert_prediction_grad(blob: tp.Numpy):
        # Evaluate the gradient. Here we only test the reduction type == "mean"
        assert np.allclose(blob, np_grad_dict["np_mse_grad_mean"])

    @flow.global_function(
        type="train", function_config=func_config,
    )
    def oneflow_mseloss(
        of_input: tp.Numpy.Placeholder(shape=input.shape),
        of_target: tp.Numpy.Placeholder(shape=target.shape),
    ) -> Dict[str, tp.Numpy]:
        with flow.scope.placement(device_type, "0:0"):
            v = flow.get_variable(
                shape=input.shape,
                dtype=flow.float32,
                initializer=flow.zeros_initializer(),
                name="x_var",
            )
            x_var = of_input + v

        flow.watch_diff(x_var, assert_prediction_grad)

        mseloss = flow.nn.MSELoss(x_var, of_target, reduction="none", name="of_mseloss")
        mseloss_mean = flow.nn.MSELoss(
            x_var, of_target, reduction="mean", name="of_mseloss_reduce_mean"
        )
        mseloss_sum = flow.nn.MSELoss(
            x_var, of_target, reduction="sum", name="of_mseloss_reduce_sum"
        )

        with flow.scope.placement(device_type, "0:0"):
            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [1e-3]), momentum=0
            ).minimize(mseloss_mean)

        return {
            "of_mse_loss": mseloss,
            "of_mse_loss_mean": mseloss_mean,
            "of_mse_loss_sum": mseloss_sum,
        }

    of_out_mseloss_dict = oneflow_mseloss(input, target)

    assert np.allclose(
        of_out_mseloss_dict["of_mse_loss"], np_out_mseloss_dict["np_mse_loss"]
    )
    assert np.allclose(
        of_out_mseloss_dict["of_mse_loss_mean"], np_out_mseloss_dict["np_mse_loss_mean"]
    )
    assert np.allclose(
        of_out_mseloss_dict["of_mse_loss_sum"], np_out_mseloss_dict["np_mse_loss_sum"]
    )
Ejemplo n.º 11
0
def _make_op_function(
    test_case,
    input,
    padding,
    grad,
    device_type,
    value_type,
    machine_ids,
    device_counts,
):
    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_counts)
    else:
        flow.config.gpu_device_num(device_counts)

    func_config = flow.FunctionConfig()

    # global function needs float32 as type of argument and return value
    if value_type == flow.float16:
        func_config.default_data_type(flow.float32)
    else:
        func_config.default_data_type(value_type)

    func_config.default_placement_scope(
        flow.scope.placement(device_type, machine_ids))
    func_config.default_logical_view(flow.scope.consistent_view())

    def _compare_diff(blob: tp.Numpy):
        test_case.assertTrue(np.allclose(grad, blob, 1e-3, 1e-3))

    if value_type == flow.float32 or value_type == flow.float64:

        @flow.global_function(type="train", function_config=func_config)
        def op_function(x: tp.Numpy.Placeholder(input.shape,
                                                dtype=value_type)):
            with flow.scope.placement(device_type, "0:0"):
                x += flow.get_variable(
                    name="input",
                    shape=input.shape,
                    dtype=value_type,
                    initializer=flow.zeros_initializer(),
                )
                out = flow.reflection_pad2d(x, padding)
                flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                    [], [0]),
                                   momentum=0).minimize(out)

            flow.watch_diff(x, _compare_diff)
            return out

        return op_function

    elif value_type == flow.int32:

        @flow.global_function(type="train", function_config=func_config)
        def op_function(x: tp.Numpy.Placeholder(input.shape,
                                                dtype=flow.float32)):
            with flow.scope.placement(device_type, "0:0"):
                x += flow.get_variable(
                    name="input",
                    shape=input.shape,
                    dtype=flow.float32,
                    initializer=flow.zeros_initializer(),
                )
                y_int32 = flow.reflection_pad2d(x, padding)
                y_fp32 = flow.cast(y_int32, dtype=flow.float32)
                flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                    [], [0]),
                                   momentum=0).minimize(y_fp32)

            flow.watch_diff(x, _compare_diff)
            return y_fp32

        return op_function

    elif value_type == flow.float16:

        @flow.global_function(type="train", function_config=func_config)
        def op_function(x: tp.Numpy.Placeholder(input.shape,
                                                dtype=flow.float32)):
            with flow.scope.placement(device_type, "0:0"):
                x_var = flow.get_variable(
                    name="input",
                    shape=input.shape,
                    dtype=flow.float32,
                    initializer=flow.constant_initializer(0),
                )
                x_var = flow.cast_to_current_logical_view(x_var)
                input_x = x_var + x
                x_fp32 = flow.cast(input_x, flow.float32)
                x_fp16 = flow.cast(input_x, dtype=flow.float16)
                y_fp16 = flow.reflection_pad2d(x_fp16, padding)
                y_fp32 = flow.cast(y_fp16, dtype=flow.float32)
                flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                    [], [0]),
                                   momentum=0).minimize(y_fp32)

            flow.watch_diff(x_fp32, _compare_diff)
            return y_fp32

        return op_function
Ejemplo n.º 12
0
def _test_batchnorm_add_relu(test_case, input_shape, axis, data_type):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.consistent_view())
    func_config.default_data_type(flow.float32)

    @flow.global_function(type="train", function_config=func_config)
    def test_job(
            x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),
            addend: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),
    ):
        v = flow.get_variable(
            name="v",
            shape=(1, ),
            dtype=flow.float32,
            initializer=flow.zeros_initializer(),
        )

        x = x + v
        addend = addend + v

        x1 = flow.identity(x)
        x2 = flow.identity(x)

        addend1 = flow.identity(addend)
        addend2 = flow.identity(addend)

        flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
        flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))

        flow.watch_diff(addend1, test_global_storage.Setter("addend1_diff"))
        flow.watch_diff(addend2, test_global_storage.Setter("addend2_diff"))

        x1 = flow.cast(x1, data_type)
        x2 = flow.cast(x2, data_type)

        addend1 = flow.cast(addend1, data_type)
        addend2 = flow.cast(addend2, data_type)

        y1 = flow.layers.batch_normalization_add_relu(x1,
                                                      addend=addend1,
                                                      axis=axis,
                                                      name="BN1")
        y2 = flow.math.relu(
            flow.layers.batch_normalization(x2, axis=axis, name="BN2") +
            addend2)

        y1 = flow.cast(y1, flow.float32)
        y2 = flow.cast(y2, flow.float32)

        flow.watch(y1, test_global_storage.Setter("y1"))
        flow.watch(y2, test_global_storage.Setter("y2"))

        loss = flow.math.reduce_mean(y1 + y2)
        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [0.001]),
                           momentum=0).minimize(flow.math.reduce_sum(loss))

        return loss

    x = np.random.rand(*input_shape).astype(np.float32)
    addend = np.random.rand(*input_shape).astype(np.float32)

    test_job(x, addend).get()

    test_case.assertTrue(
        np.allclose(test_global_storage.Get("y1"),
                    test_global_storage.Get("y2")))
    test_case.assertTrue(
        np.allclose(test_global_storage.Get("x1_diff"),
                    test_global_storage.Get("x2_diff")))
    test_case.assertTrue(
        np.allclose(
            test_global_storage.Get("addend1_diff"),
            test_global_storage.Get("addend2_diff"),
        ))
Ejemplo n.º 13
0
def _compare_with_np(
    test_case,
    shape,
    index_shape,
    dynamic_shape=None,
    dynamic_index_shape=None,
    dtype="float32",
    index_dtype="int32",
    device_type="gpu",
    device_num=1,
    dynamic=False,
):
    x_is_floating = _is_floating_dtype(dtype)
    need_grad = True if x_is_floating else False
    x_of_dtype = type_name_to_flow_type[dtype]
    index_of_dtype = type_name_to_flow_type[index_dtype]
    x_dtype = type_name_to_np_type[dtype]
    index_dtype = type_name_to_np_type[index_dtype]

    if dynamic_shape is None:
        dynamic_shape = shape
    else:
        dynamic = True

    if dynamic_index_shape is None:
        dynamic_index_shape = index_shape
    else:
        dynamic = True

    if dynamic:
        x, index, y, dx = [], [], [], []
        for _ in range(device_num):
            x_, index_ = _random_inputs(
                dynamic_shape, x_dtype, dynamic_index_shape, index_dtype
            )
            y_, dx_ = _gather_nd_np(x_, index_, need_grad)
            x.append(x_)
            index.append(index_)
            y.append(y_)
            dx.append(dx_)

        def comp_diff(dx_blob: flow.typing.ListNumpy):
            for dx_blob_, dx_ in zip(dx_blob, dx):
                test_case.assertTrue(np.array_equal(dx_blob_, dx_))

    else:
        x, index = _random_inputs(
            dynamic_shape, x_dtype, dynamic_index_shape, index_dtype
        )
        y, dx = _gather_nd_np(x, index, need_grad)

        def comp_diff(dx_blob: flow.typing.Numpy):
            test_case.assertTrue(np.array_equal(dx_blob, dx))

    flow.clear_default_session()
    gather_nd_fn = _make_gather_nd_fn(
        shape,
        index_shape,
        x_of_dtype,
        index_of_dtype,
        device_type,
        device_num,
        dynamic,
        need_grad,
        comp_diff if device_num == 1 else None,
    )
    ret_y = gather_nd_fn(x, index)

    if dynamic:
        for ret_y_, y_ in zip(ret_y, y):
            test_case.assertTrue(np.array_equal(ret_y_, y_))
    else:
        test_case.assertTrue(np.array_equal(ret_y, y))
    def test_alexnet(test_case, batch_size=DEFAULT_BATCH_SIZE, num_batchs=6):
        init_env()
        alexnet_infer, input_lbns, output_lbns = make_alexnet_infer_func(
            batch_size, (DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, 3))
        flow.load_variables(flow.checkpoint.get(DEFAULT_CHECKPOINT_DIR))

        # save model
        saved_model_path = "alexnet_models"
        model_name = "alexnet"
        model_version = 1

        model_version_path = os.path.join(saved_model_path, str(model_version))
        if os.path.exists(saved_model_path) and os.path.isdir(
                saved_model_path):
            print("WARNING: The model version path '{}' already exist"
                  ", old version directory will be removed".format(
                      model_version_path))
            shutil.rmtree(saved_model_path)

        saved_model_builder = flow.saved_model.ModelBuilder(saved_model_path)
        signature_builder = (saved_model_builder.ModelName(model_name).Version(
            model_version).AddFunction(alexnet_infer).AddSignature("regress"))
        for input_name, lbn in input_lbns.items():
            signature_builder.Input(input_name, lbn)
        for output_name, lbn in output_lbns.items():
            signature_builder.Output(output_name, lbn)
        saved_model_builder.Save()

        # test data
        new_batch_size = int(batch_size / 2)
        dataset = ImageNetRecordDataset(
            batch_size=new_batch_size,
            image_resize_size=DEFAULT_IMAGE_SIZE,
            data_format="NHWC",
        )
        image_list, label_list = dataset.load_batchs(num_batchs)
        assert image_list[0].shape[0] == new_batch_size
        image_size = tuple(image_list[0].shape[1:])

        flow.clear_default_session()
        alexnet_infer, _, _ = make_alexnet_infer_func(new_batch_size,
                                                      image_size)
        flow.load_variables(flow.checkpoint.get(DEFAULT_CHECKPOINT_DIR))
        print("alexnet inference result:")
        origin_outputs = []
        for i, (image, label) in enumerate(zip(image_list, label_list)):
            output = alexnet_infer(image, label)
            # origin_outputs.append(output.item())
            # print("iter#{:<6} output:".format(i), output.item())
            origin_outputs.append(output)
            print("iter#{:<6} output:".format(i), output)

        origin_outputs = np.array(origin_outputs, dtype=np.float32)

        # load model and run
        flow.clear_default_session()
        model_meta_file_path = os.path.join(saved_model_path,
                                            str(model_version),
                                            "saved_model.prototxt")
        saved_model_proto = load_saved_model(model_meta_file_path)
        sess = flow.serving.InferenceSession()
        checkpoint_path = os.path.join(saved_model_path, str(model_version),
                                       saved_model_proto.checkpoint_dir)
        sess.set_checkpoint_path(checkpoint_path)

        graph_name = saved_model_proto.default_graph_name
        graph_def = saved_model_proto.graphs[graph_name]
        signature_def = graph_def.signatures[graph_def.default_signature_name]

        with sess.open(graph_name, signature_def, new_batch_size):
            sess.compile(graph_def.op_list)

        # sess.print_job_set()
        sess.launch()

        job_name = sess.list_jobs()[0]
        input_names = sess.list_inputs()
        print("input names:", input_names)
        for input_name in input_names:
            print('input "{}" info: {}'.format(
                input_name, sess.input_info(input_name, job_name)))
        output_names = sess.list_outputs()
        print("output names:", output_names)
        for output_name in output_names:
            print('output "{}" info: {}'.format(
                output_name, sess.output_info(output_name, job_name)))

        print("load saved alexnet and inference result:")
        print_input_info = False
        cmp_outputs = []
        for i, (image, label) in enumerate(zip(image_list, label_list)):
            if print_input_info:
                print("image shape: {}, dtype: {}".format(
                    image.shape, image.dtype))
                print("label shape: {}, dtype: {}, data: {}".format(
                    label.shape, label.dtype, label))
                if i > 1:
                    print((image - image_list[i - 1]).mean())

            outputs = sess.run(alexnet_infer.__name__,
                               image=image,
                               label=label)
            # cmp_outputs.append(outputs[0].item())
            # print("iter#{:<6} output:".format(i), outputs[0].item())
            cmp_outputs.append(outputs[0])
            print("iter#{:<6} output:".format(i), outputs[0])

        cmp_outputs = np.array(cmp_outputs, dtype=np.float32)
        test_case.assertTrue(np.allclose(origin_outputs, cmp_outputs))
        sess.close()
Ejemplo n.º 15
0
def compare_with_numpy_lazy_adam(
    device_type,
    x_shape,
    beta1,
    beta2,
    epsilon,
    learning_rate,
    train_iters,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float32)

    @flow.global_function(type="train", function_config=func_config)
    def testLazyAdam() -> flow.typing.Numpy:
        with flow.scope.placement(device_type, "0:0-0"):
            x = flow.get_variable(
                name="x",
                shape=x_shape,
                dtype=flow.float32,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            loss = flow.math.reduce_mean(x)

            flow.optimizer.LazyAdam(
                flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
                beta1=beta1,
                beta2=beta2,
                epsilon=epsilon,
            ).minimize(loss)

            return x

    init_value = None
    for i in range(train_iters + 1):
        x = testLazyAdam()
        if i == 0:
            init_value = np.copy(x)

    def lazy_adam_update_numpy(
        param,
        gradient,
        iter,
        m,
        v,
        lr=0.001,
        beta1=0.9,
        beta2=0.999,
        epsilon=1e-7,
    ):

        lr_t = lr * np.sqrt(1 - beta2**(iter + 1)) / (1 - beta1**(iter + 1))

        m_t = np.copy(m)
        v_t = np.copy(v)

        m_t_o = beta1 * m + (1 - beta1) * gradient
        v_t_o = beta2 * v + (1 - beta2) * gradient * gradient

        m_t = m_t_o
        v_t = v_t_o

        param_t = np.copy(param)

        param_t_o = param - lr_t * m_t / (np.sqrt(v_t) + epsilon)

        param_t = param_t_o

        return param_t, m_t, v_t

    param = init_value
    gradient = np.full(param.shape, 1.0 / np.prod(param.shape))
    m = np.zeros(param.shape)
    v = np.zeros(param.shape)

    for i in range(train_iters):
        param, m, v = lazy_adam_update_numpy(param, gradient, i, m, v,
                                             learning_rate, beta1, beta2,
                                             epsilon)

    assert np.allclose(
        x.flatten(),
        param.flatten(),
        rtol=1e-4,
        atol=1e-4,
    )
Ejemplo n.º 16
0
def compare_with_tensorflow(device_type, x_shape, data_type, axis):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()

    if data_type == "float16":
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    @flow.global_function(type="train", function_config=func_config)
    def SoftmaxJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=-1.0,
                                                            maxval=1.0),
                trainable=True,
            )
            x1 = x
            x = flow.identity(x)
            if data_type == "float16":
                loss = flow.cast(
                    flow.nn.softmax(flow.cast(x, dtype=flow.float16),
                                    axis=axis),
                    dtype=flow.float,
                )
            else:
                loss = flow.nn.softmax(x, axis=axis)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            total_loss = loss * x1

            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(total_loss)

            return loss

    # OneFlow
    of_out = SoftmaxJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.softmax(x, axis=axis)

    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    if data_type == "float16":
        tolerance = 1e-3
    else:
        tolerance = 1e-5
    assert np.allclose(of_out.numpy(),
                       tf_out.numpy(),
                       rtol=tolerance,
                       atol=tolerance)
    assert np.allclose(
        test_global_storage.Get("x_diff"),
        tf_x_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
Ejemplo n.º 17
0
def compare_with_numpy_lars(
    device_type,
    x_shape,
    momentum_beta,
    epsilon,
    lars_coefficient,
    learning_rate,
    weight_decay,
    train_iters,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float32)

    @flow.global_function(type="train", function_config=func_config)
    def testLars(random_mask: flow.typing.Numpy.Placeholder(
        x_shape, dtype=flow.float32)) -> flow.typing.Numpy:
        with flow.scope.placement(device_type, "0:0-0"):
            x = flow.get_variable(
                name="x",
                shape=x_shape,
                dtype=flow.float32,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            loss = flow.math.reduce_mean(x * random_mask)
            flow.optimizer.LARS(
                flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
                momentum_beta=momentum_beta,
                epsilon=epsilon,
                lars_coefficient=lars_coefficient,
                weight_decay=weight_decay,
            ).minimize(loss)
            return x

    # generate random number sequences
    random_masks_seq = []
    for i in range(train_iters + 1):
        random_masks_seq.append(
            np.random.uniform(size=x_shape).astype(np.float32))

    init_value = None
    for i in range(train_iters + 1):
        x = testLars(random_masks_seq[i])
        if i == 0:
            init_value = np.copy(x)

    def lars_update_numpy(
        param,
        gradient,
        momentum,
        learning_rate,
        momentum_beta,
        weight_decay,
        epsilon,
        lars_coefficient,
    ):
        import math

        model_norm = math.sqrt(np.sum(param * param))
        model_diff_norm = math.sqrt(np.sum(gradient * gradient))

        if model_norm > 0 and model_diff_norm > 0:
            lars = (lars_coefficient * model_norm /
                    (model_diff_norm + weight_decay * model_norm + epsilon))
        else:
            lars = 1.0

        local_learning_rate = learning_rate * lars

        momentum_t = momentum_beta * momentum - local_learning_rate * gradient

        param_t = param + momentum_t - local_learning_rate * weight_decay * param

        return param_t, momentum_t

    param = init_value
    gradient = np.full(param.shape, 1.0 / np.prod(param.shape))
    momentum = np.zeros(param.shape)

    for i in range(train_iters):
        param, momentum = lars_update_numpy(
            param,
            gradient * random_masks_seq[i],
            momentum,
            learning_rate,
            momentum_beta,
            weight_decay,
            epsilon,
            lars_coefficient,
        )

    assert np.allclose(
        x.flatten(),
        param.flatten(),
        rtol=1e-4,
        atol=1e-4,
    )
Ejemplo n.º 18
0
def compare_with_not_fused(test_case, device_type, x_shape, data_type,
                           data_format):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()

    if data_type == "float16":
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    if data_format == "NCHW":
        bias_shape = (x_shape[1], )
    elif data_format == "NHWC":
        bias_shape = (x_shape[len(x_shape) - 1], )

    @flow.global_function(type="train", function_config=func_config)
    def FlowJob(
            value: oft.Numpy.Placeholder(x_shape),
            bias: oft.Numpy.Placeholder(bias_shape),
    ):
        with flow.scope.placement(device_type, "0:0"):
            value += flow.get_variable(
                name="v1",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            bias += flow.get_variable(
                name="v2",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )

            x1 = flow.identity(value)
            x2 = flow.identity(value)

            bias1 = flow.identity(bias)
            bias2 = flow.identity(bias)

            flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
            flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))

            flow.watch_diff(bias1, test_global_storage.Setter("bias1_diff"))
            flow.watch_diff(bias2, test_global_storage.Setter("bias2_diff"))

            if data_type == "float16":
                y1 = flow.cast(
                    flow.math.gelu(
                        flow.nn.bias_add(
                            flow.cast(x1, dtype=flow.float16),
                            flow.cast(bias1, dtype=flow.float16),
                            data_format=data_format,
                        ), ),
                    dtype=flow.float,
                )
                y2 = flow.cast(
                    flow.nn.fused_bias_add_gelu(
                        flow.cast(x2, dtype=flow.float16),
                        flow.cast(bias2, dtype=flow.float16),
                        data_format=data_format,
                    ),
                    dtype=flow.float,
                )
            else:
                y1 = flow.math.gelu(
                    flow.nn.bias_add(x1, bias1, data_format=data_format))
                y2 = flow.nn.fused_bias_add_gelu(x2,
                                                 bias2,
                                                 data_format=data_format)
            flow.watch(y1, test_global_storage.Setter("y1"))
            flow.watch(y2, test_global_storage.Setter("y2"))
            flow.watch_diff(y1, test_global_storage.Setter("y1_diff"))
            flow.watch_diff(y2, test_global_storage.Setter("y2_diff"))

            loss = y1 + y2
        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [0.001]),
                           momentum=0).minimize(flow.math.reduce_sum(loss))

        return loss

    x = np.random.uniform(low=0, high=10, size=x_shape).astype(np.float32)
    bias = np.random.uniform(low=0, high=10,
                             size=bias_shape).astype(np.float32)
    of_out = FlowJob(x, bias).get()

    y1 = test_global_storage.Get("y1")
    y2 = test_global_storage.Get("y2")

    tol = 1e-5
    test_case.assertTrue(
        np.allclose(y1, y2, rtol=tol, atol=tol, equal_nan=True))
    x1_diff = test_global_storage.Get("x1_diff")
    x2_diff = test_global_storage.Get("x2_diff")
    test_case.assertTrue(
        np.allclose(x1_diff, x2_diff, rtol=tol, atol=tol, equal_nan=True))
    bias1_diff = test_global_storage.Get("bias1_diff")
    bias2_diff = test_global_storage.Get("bias2_diff")
    test_case.assertTrue(
        np.allclose(bias1_diff, bias2_diff, rtol=tol, atol=tol,
                    equal_nan=True))
Ejemplo n.º 19
0
def compare_with_numpy_indexed_slices_adamw(
    device_type,
    model_shape,
    ids_shape,
    grad_shape,
    beta1,
    beta2,
    epsilon,
    learning_rate,
    train_iters,
    mul_scalar,
    weight_decay,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float32)
    func_config.indexed_slices_optimizer_conf(
        dict(include_op_names=dict(op_name=["embeddings"])))

    @flow.global_function(type="train", function_config=func_config)
    def testIndexedSlicesAdamW(
        sparse_ids: flow.typing.Numpy.Placeholder(ids_shape, dtype=flow.int32),
    ) -> flow.typing.Numpy:
        with flow.scope.placement(device_type, "0:0"):
            embedding_table = flow.get_variable(
                name="embeddings",
                shape=model_shape,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            embedding = flow.gather(params=embedding_table * mul_scalar,
                                    indices=sparse_ids)
            loss = flow.math.reduce_mean(embedding)

            flow.optimizer.AdamW(
                flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
                beta1=beta1,
                beta2=beta2,
                epsilon=epsilon,
                do_bias_correction=True,
                weight_decay=weight_decay,
            ).minimize(loss)

            return embedding_table

    sparse_ids = np.random.randint(model_shape[0],
                                   size=ids_shape).astype(np.int32)

    init_value = None
    for i in range(train_iters + 1):
        x = testIndexedSlicesAdamW(sparse_ids)
        if i == 0:
            init_value = np.copy(x)

    def indexed_slices_update_numpy(
        param,
        unique_dict,
        iter,
        m,
        v,
        lr=0.001,
        beta1=0.9,
        beta2=0.999,
        epsilon=1e-7,
        weight_decay=0.9,
    ):
        param_t = np.copy(param)
        m_t = np.copy(m)
        v_t = np.copy(v)
        for ids in unique_dict.keys():
            lr_t = lr * np.sqrt(1 - beta2**(iter + 1)) / (1 -
                                                          beta1**(iter + 1))
            m_t_o = beta1 * m[ids] + (1 - beta1) * unique_dict[ids]
            v_t_o = beta2 * v[ids] + (
                1 - beta2) * unique_dict[ids] * unique_dict[ids]
            m_t[ids] = m_t_o
            v_t[ids] = v_t_o
            param_t_o = param[ids] - lr_t * (m_t[ids] /
                                             (np.sqrt(v_t[ids]) + epsilon) +
                                             weight_decay * param[ids])
            param_t[ids] = param_t_o

        return param_t, m_t, v_t

    param = init_value
    gradient = np.full(grad_shape, float(mul_scalar) / np.prod(grad_shape))
    m = np.zeros(param.shape)
    v = np.zeros(param.shape)
    unique_dict = unique_grads(sparse_ids, gradient)

    for i in range(train_iters):
        param, m, v = indexed_slices_update_numpy(
            param,
            unique_dict,
            i,
            m,
            v,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            weight_decay,
        )
    assert np.allclose(
        x.flatten(),
        param.flatten(),
        rtol=1e-4,
        atol=1e-4,
    )
Ejemplo n.º 20
0
def _compare_instance_norm_2d_with_np(input_shape, device_type, machine_ids,
                                      device_counts, eps, affine):
    assert device_type in ["cpu", "gpu"]
    assert len(input_shape) == 4

    flow.clear_default_session()

    if device_type == "cpu":
        flow.config.cpu_device_num(device_counts)
    else:
        flow.config.gpu_device_num(device_counts)

    func_config = flow.FunctionConfig()
    func_config.default_placement_scope(
        flow.scope.placement(device_type, machine_ids))

    input = np.random.random(size=input_shape).astype(np.float32)
    gout = np.random.random(size=input_shape).astype(np.float32)

    # compute instance normalization in numpy
    gamma = np.ones((1, input_shape[1], 1, 1), dtype=np.float32)
    mean_np = np.mean(input, axis=(2, 3), keepdims=True)
    in_sub_mean = input - mean_np
    var_np = np.mean(np.square(in_sub_mean), axis=(2, 3), keepdims=True)
    invar_np = 1.0 / np.sqrt(var_np + eps)
    out_np = in_sub_mean * invar_np * gamma

    def assert_prediction_grad(gin_of: tp.Numpy):
        # compute the gradient of variance
        gvar = gout * gamma * in_sub_mean * -0.5 * np.power(var_np + eps, -1.5)
        gvar = np.sum(gvar, axis=(2, 3), keepdims=True)
        # compute the gradient of mean
        gmean = np.sum(gout * gamma, axis=(2, 3), keepdims=True)
        gmean *= -invar_np
        scale = 1.0 / (input_shape[2] * input_shape[3])
        tmp = scale * np.sum(-2.0 * in_sub_mean, axis=(2, 3),
                             keepdims=True) * gvar
        gmean += tmp
        # compute the gradient of input
        gin_np = (gout * gamma * invar_np + gvar * scale * 2.0 * in_sub_mean +
                  gmean * scale)

        assert np.allclose(gin_of, gin_np, atol=1e-5)

    @flow.global_function(type="train", function_config=func_config)
    def instanceNormJob(
            of_input: tp.Numpy.Placeholder(shape=input.shape),
            multipler: tp.Numpy.Placeholder(shape=input.shape),
    ) -> tp.Numpy:
        with flow.scope.placement(device_type, "0:0"):
            v = flow.get_variable(
                shape=of_input.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
                name="v",
            )

            x_var = of_input + v
            # watch the gradient
            flow.watch_diff(x_var, assert_prediction_grad)

        out = flow.nn.InstanceNorm2d(x_var, eps=eps, affine=affine)

        with flow.scope.placement(device_type, "0:0"):
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-3]),
                               momentum=0).minimize(out * multipler)

        return out

    check = flow.train.CheckPoint()
    check.init()

    of_out = instanceNormJob(input, gout)

    assert np.allclose(of_out, out_np, atol=1e-5)
Ejemplo n.º 21
0
def compare_with_flow_job_fused_adam_model_update(device_type, x_shape, beta1,
                                                  beta2, epsilon,
                                                  learning_rate, train_iters):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()

    def flow_net(var_name, random_mask):
        with flow.scope.placement(device_type, "0:0-0"):
            x = flow.get_variable(
                name=var_name,
                shape=x_shape,
                dtype=flow.float32,
                initializer=flow.ones_initializer(),
                trainable=True,
            )
            constant_val = flow.constant(3.0, dtype=flow.float32, shape=(1, ))
            x = x * constant_val
            x = x * 2.0
            if device_type == "gpu":
                x = flow.cast(x, flow.float16)
                x = flow.math.relu(x)
                x = flow.cast(x, flow.float)
            loss = flow.math.reduce_mean(x * random_mask)
            flow.optimizer.Adam(
                flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
                beta1=beta1,
                beta2=beta2,
                epsilon=epsilon,
                do_bias_correction=True,
            ).minimize(loss)
            return x

    def make_adam_job():
        func_config = flow.FunctionConfig()
        func_config.default_data_type(flow.float32)

        @flow.global_function(type="train", function_config=func_config)
        def testAdam(random_mask: flow.typing.Numpy.Placeholder(
            x_shape, dtype=flow.float32)) -> flow.typing.Numpy:
            return flow_net("x1", random_mask)

        return testAdam

    def make_fused_adam_job():
        func_config = flow.FunctionConfig()
        func_config.default_data_type(flow.float32)
        func_config.enable_fuse_model_update_ops(True)

        @flow.global_function(type="train", function_config=func_config)
        def testFusedAdam(random_mask: flow.typing.Numpy.Placeholder(
            x_shape, dtype=flow.float32)) -> flow.typing.Numpy:
            return flow_net("x2", random_mask)

        return testFusedAdam

    adam_job = make_adam_job()
    fused_adam_job = make_fused_adam_job()

    # generate random number sequences
    random_masks_seq = []
    for i in range(train_iters + 1):
        random_masks_seq.append(
            np.random.uniform(size=x_shape).astype(np.float32))

    for i in range(train_iters + 1):
        var1 = adam_job(random_masks_seq[i])

    for i in range(train_iters + 1):
        var2 = fused_adam_job(random_masks_seq[i])
    assert np.allclose(
        var1.flatten(),
        var2.flatten(),
        rtol=1e-4,
        atol=1e-4,
    )
Ejemplo n.º 22
0
def compare_with_tensorflow(device_type, params_case, dilations, data_format):
    input_shape, output_shape, padding, strides, kernel_size = params_case
    assert data_format in ["NCHW", "NHWC"]
    out_channels = output_shape[1] if data_format == "NCHW" else output_shape[3]
    in_channels = input_shape[1] if data_format == "NCHW" else input_shape[3]
    assert device_type in ["gpu"]

    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def DeconvJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            if data_format == "NCHW":
                weight = flow.get_variable(
                    "weight",
                    shape=(in_channels, out_channels, kernel_size,
                           kernel_size),
                    dtype=flow.float,
                    initializer=flow.random_uniform_initializer(minval=-10,
                                                                maxval=10),
                    trainable=True,
                )
            else:
                weight = flow.get_variable(
                    "weight",
                    shape=(in_channels, kernel_size, kernel_size,
                           out_channels),
                    dtype=flow.float,
                    initializer=flow.random_uniform_initializer(minval=-10,
                                                                maxval=10),
                    trainable=True,
                )
            loss = flow.nn.conv2d_transpose(
                x,
                weight,
                strides=strides,
                output_shape=output_shape,
                dilations=dilations,
                padding=padding,
                data_format=data_format,
            )
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(weight, test_global_storage.Setter("weight"))
            flow.watch_diff(weight, test_global_storage.Setter("weight_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = DeconvJob().get()
    # Tensorflow
    if data_format == "NCHW":
        with tf.GradientTape(persistent=True) as tape:
            x = tf.Variable(test_global_storage.Get("x").transpose(0, 2, 3, 1))
            output_shape = (
                output_shape[0],
                output_shape[2],
                output_shape[3],
                output_shape[1],
            )
            w = tf.Variable(
                test_global_storage.Get("weight").transpose(2, 3, 1, 0))
            tf_out = tf.nn.conv2d_transpose(
                x,
                w,
                output_shape=output_shape,
                strides=[1, strides, strides, 1],
                padding=padding,
                data_format="NHWC",
            )

        loss_diff = test_global_storage.Get("loss_diff").transpose(0, 2, 3, 1)
        tf_x_diff = tape.gradient(tf_out, x, loss_diff)
        tf_weight_diff = tape.gradient(tf_out, w, loss_diff)

        assert np.allclose(of_out.numpy().transpose(0, 2, 3, 1),
                           tf_out.numpy(),
                           rtol=1e-02,
                           atol=1e-02)
        assert np.allclose(
            test_global_storage.Get("x_diff").transpose(0, 2, 3, 1),
            tf_x_diff.numpy(),
            rtol=1e-4,
            atol=1e-4,
        )
        assert np.allclose(
            test_global_storage.Get("weight_diff").transpose(2, 3, 1, 0),
            tf_weight_diff.numpy(),
            rtol=1e-4,
            atol=1e-4,
        )
    else:
        with tf.GradientTape(persistent=True) as tape:
            x = tf.Variable(test_global_storage.Get("x"))
            w = tf.Variable(
                test_global_storage.Get("weight").transpose(1, 2, 3, 0))
            tf_out = tf.nn.conv2d_transpose(
                x,
                w,
                output_shape=output_shape,
                strides=[1, strides, strides, 1],
                padding=padding,
                data_format="NHWC",
            )
        loss_diff = test_global_storage.Get("loss_diff")
        tf_x_diff = tape.gradient(tf_out, x, loss_diff)
        tf_weight_diff = tape.gradient(tf_out, w, loss_diff)

        assert np.allclose(of_out.numpy(),
                           tf_out.numpy(),
                           rtol=1e-02,
                           atol=1e-02), (of_out.numpy() - tf_out.numpy())
        assert np.allclose(test_global_storage.Get("x_diff"),
                           tf_x_diff.numpy(),
                           rtol=1e-02,
                           atol=1e-02)
        assert np.allclose(
            test_global_storage.Get("weight_diff").transpose(1, 2, 3, 0),
            tf_weight_diff.numpy(),
            rtol=1e-2,
            atol=1e-2,
        )
def compare_with_tensorflow(device_type, data_type, shape):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()

    dtype = type_name_to_flow_type[data_type]

    def np_sigmoid(x):
        return 1 / (1 + np.exp(-x))

    @flow.global_function(type="train", function_config=func_config)
    def SigmoidCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder(
        shape, dtype)):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=shape,
                dtype=type_name_to_flow_type[data_type],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = flow.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                             logits=x)

            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
            return loss

    # fake labels
    labels = np_sigmoid(np.random.randint(0, 10, size=shape)).astype(
        type_name_to_np_type[data_type])

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = SigmoidCrossEntropyWithLogitsJob(labels).get()

    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.sigmoid_cross_entropy_with_logits(labels, x)

        loss_diff = test_global_storage.Get("loss_diff")
        tf_x_diff = tape.gradient(tf_out, x, loss_diff)

    tolerance = 1e-5
    assert np.allclose(of_out.numpy(),
                       tf_out.numpy(),
                       rtol=tolerance,
                       atol=tolerance)
    assert np.allclose(
        test_global_storage.Get("x_diff"),
        tf_x_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
    flow.clear_default_session()
Ejemplo n.º 24
0
def _make_dim_gather_fn(
    test_case,
    input,
    index,
    dim,
    grad,
    device_type,
    value_type,
    index_type,
    machine_ids,
    device_counts,
):
    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_counts)
    else:
        flow.config.gpu_device_num(device_counts)

    func_config = flow.FunctionConfig()

    # global function needs float32 as type of argument and return value
    if value_type == flow.float16:
        func_config.default_data_type(flow.float32)
    else:
        func_config.default_data_type(value_type)

    func_config.default_placement_scope(
        flow.scope.placement(device_type, machine_ids))
    func_config.default_logical_view(flow.scope.consistent_view())

    def _compare_diff(blob: oft.Numpy):
        test_case.assertTrue(np.allclose(grad, blob))

    if value_type == flow.float16:

        @flow.global_function(type="train", function_config=func_config)
        def gather_fn(
            params_def: oft.Numpy.Placeholder(input.shape, dtype=flow.float32),
            indices_def: oft.Numpy.Placeholder(index.shape, dtype=index_type),
        ) -> oft.Numpy:
            with flow.scope.placement(device_type, "0:0"):
                x_var = flow.get_variable(
                    "input",
                    shape=input.shape,
                    dtype=flow.float32,
                    initializer=flow.constant_initializer(0),
                )
                x_var = flow.cast_to_current_logical_view(x_var)
                x = x_var + params_def
                x_f16 = flow.cast(x, flow.float16)

            y_f16 = flow.dim_gather(x_f16, dim, indices_def)
            x_f32 = flow.cast(x, flow.float32)
            y_f32 = flow.cast(y_f16, flow.float32)

            y = flow.dim_gather(x, dim, indices_def)

            with flow.scope.placement(device_type, "0:0"):
                flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                    [], [1e-3]),
                                   momentum=0).minimize(y_f32)

            flow.watch_diff(x_f32, _compare_diff)
            return y_f32

        return gather_fn
    elif value_type == flow.float32 or value_type == flow.float64:

        @flow.global_function(type="train", function_config=func_config)
        def gather_fn(
            params_def: oft.Numpy.Placeholder(input.shape, dtype=value_type),
            indices_def: oft.Numpy.Placeholder(index.shape, dtype=index_type),
        ) -> oft.Numpy:
            with flow.scope.placement(device_type, "0:0"):
                x_var = flow.get_variable(
                    "input",
                    shape=input.shape,
                    dtype=value_type,
                    initializer=flow.constant_initializer(0),
                )
                x_var = flow.cast_to_current_logical_view(x_var)
                x = x_var + params_def

            y = flow.dim_gather(x, dim, indices_def)

            with flow.scope.placement(device_type, "0:0"):
                flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                    [], [1e-3]),
                                   momentum=0).minimize(y)

            flow.watch_diff(x, _compare_diff)
            return y

        return gather_fn
    elif value_type == flow.int32:

        @flow.global_function(type="train", function_config=func_config)
        def gather_fn(
            params_def: oft.Numpy.Placeholder(input.shape, dtype=flow.float32),
            indices_def: oft.Numpy.Placeholder(index.shape, dtype=index_type),
        ) -> oft.Numpy:
            with flow.scope.placement(device_type, "0:0"):
                x_var = flow.get_variable(
                    "input",
                    shape=input.shape,
                    dtype=flow.float32,
                    initializer=flow.constant_initializer(0),
                )
                x_var = flow.cast_to_current_logical_view(x_var)
                x = x_var + params_def

            x_int32 = flow.cast(x, dtype=flow.int32)
            y_int32 = flow.dim_gather(x, dim, indices_def)
            y_fp32 = flow.cast(y_int32, dtype=flow.float32)

            with flow.scope.placement(device_type, "0:0"):
                flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                    [], [1e-3]),
                                   momentum=0).minimize(y_fp32)

            flow.watch_diff(x, _compare_diff)
            return y_fp32

        return gather_fn
Ejemplo n.º 25
0
 def setUp(self):
     oneflow.clear_default_session()
     oneflow.enable_eager_execution(False)
    def test_resnet(test_case, batch_size=DEFAULT_BATCH_SIZE, num_batchs=6):
        init_env()
        # input image format NCHW
        image_size = (3, DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE)
        resnet_infer, input_lbns, output_lbns = make_resnet_infer_func(
            batch_size, image_size)

        # resnet inference model parameters
        flow.load_variables(flow.checkpoint.get(DEFAULT_CHECKPOINT_DIR))

        # test data
        dataset = ImageNetRecordDataset(
            batch_size=batch_size,
            image_resize_size=DEFAULT_IMAGE_SIZE,
            data_format="NCHW",
        )
        image_list, label_list = dataset.load_batchs(num_batchs)

        print("resnet inference result:")
        origin_outputs = []
        for i, (image, label) in enumerate(zip(image_list, label_list)):
            output = resnet_infer(image)
            arg_max = np.argmax(output, axis=1)
            origin_outputs.append(arg_max)
            print("iter#{:<6} predict: ".format(i), arg_max, "label: ", label)

        origin_outputs = np.array(origin_outputs, dtype=np.float32)

        # save model
        saved_model_path = "resnet50_models"
        model_version = 1

        model_version_path = os.path.join(saved_model_path, str(model_version))
        if os.path.exists(model_version_path) and os.path.isdir(
                model_version_path):
            print("WARNING: The model version path '{}' already exist"
                  ", old version directory will be removed".format(
                      model_version_path))
            shutil.rmtree(model_version_path)

        saved_model_builder = flow.saved_model.ModelBuilder(saved_model_path)
        signature_builder = (saved_model_builder.ModelName("resnet50").Version(
            model_version).AddFunction(resnet_infer).AddSignature("regress"))
        for input_name, lbn in input_lbns.items():
            signature_builder.Input(input_name, lbn)
        for output_name, lbn in output_lbns.items():
            signature_builder.Output(output_name, lbn)
        saved_model_builder.Save()

        # load model and run
        flow.clear_default_session()
        sess = flow.serving.InferenceSession()
        sess.load_saved_model(saved_model_path)
        # sess.print_job_set()
        sess.launch()

        job_name = sess.list_jobs()[0]
        input_names = sess.list_inputs()
        print("input names:", input_names)
        for input_name in input_names:
            print('input "{}" info: {}'.format(
                input_name, sess.input_info(input_name, job_name)))

        print("load saved resnet and inference result:")
        cmp_outputs = []
        for i, (image, label) in enumerate(zip(image_list, label_list)):
            outputs = sess.run(resnet_infer.__name__, image=image)
            arg_max = np.argmax(outputs[0], axis=1)
            cmp_outputs.append(arg_max)
            print("iter#{:<6} output:".format(i), arg_max, "label: ", label)

        cmp_outputs = np.array(cmp_outputs, dtype=np.float32)
        test_case.assertTrue(np.allclose(origin_outputs, cmp_outputs))
        sess.close()
Ejemplo n.º 27
0
def _run_test_fake_quantize(
    test_case,
    device_type,
    device_num,
    dtype,
    in_shape,
    quantization_bit,
    quantization_scheme,
    per_layer_quantization,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_num)
    else:
        flow.config.gpu_device_num(device_num)

    @flow.global_function(type="train", function_config=flow.FunctionConfig())
    def QuantizeJob(input: oft.Numpy.Placeholder(
        in_shape, dtype=type_name_to_flow_type[dtype])):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=in_shape,
                dtype=input.dtype,
                initializer=flow.zeros_initializer(input.dtype),
                trainable=True,
            )
            input_x = input + x

        flow.watch_diff(input_x, test_global_storage.Setter("input_diff"))

        with flow.scope.placement(device_type, "0:0-%d" % (device_num - 1)):
            scale, zero_point = flow.quantization.min_max_observer(
                input_x, quantization_bit, quantization_scheme,
                per_layer_quantization)
            out = flow.quantization.fake_quantization(input_x, scale,
                                                      zero_point,
                                                      quantization_bit,
                                                      quantization_scheme)
            loss = flow.math.reduce_mean(out)

            flow.optimizer.Adam(
                flow.optimizer.PiecewiseConstantScheduler(
                    [], [0.001]), ).minimize(loss)

        return out

    check_point = flow.train.CheckPoint()
    check_point.init()

    input = (np.random.random(in_shape) - 0.5).astype(
        type_name_to_np_type[dtype])
    out = QuantizeJob(input).get()

    input_diff = test_global_storage.Get("input_diff")

    _check_fake_quantize(
        test_case,
        input,
        input_diff.flatten(),
        out.numpy().flatten(),
        quantization_bit,
        quantization_scheme,
        per_layer_quantization,
    )
Ejemplo n.º 28
0
def compare_with_numpy_adamw(
    device_type,
    x_shape,
    beta1,
    beta2,
    epsilon,
    weight_decay,
    learning_rate,
    train_iters,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float32)

    @flow.global_function(type="train", function_config=func_config)
    def testAdamW(random_mask: flow.typing.Numpy.Placeholder(
        x_shape, dtype=flow.float32)) -> flow.typing.Numpy:
        with flow.scope.placement(device_type, "0:0-0"):
            x = flow.get_variable(
                name="x",
                shape=x_shape,
                dtype=flow.float32,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            loss = flow.math.reduce_mean(x * random_mask)
            flow.optimizer.AdamW(
                flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
                beta1=beta1,
                beta2=beta2,
                epsilon=epsilon,
                weight_decay=weight_decay,
                do_bias_correction=True,
            ).minimize(loss)
            return x

    # generate random number sequences
    random_masks_seq = []
    for i in range(train_iters + 1):
        random_masks_seq.append(
            np.random.uniform(size=x_shape).astype(np.float32))

    init_value = None
    for i in range(train_iters + 1):
        x = testAdamW(random_masks_seq[i])
        if i == 0:
            init_value = np.copy(x)

    def adamw_update_numpy(
        param,
        gradient,
        iter,
        m,
        v,
        lr=0.001,
        beta1=0.9,
        beta2=0.999,
        epsilon=1e-7,
        weight_decay=0.9,
    ):
        lr_t = lr * np.sqrt(1 - beta2**(iter + 1)) / (1 - beta1**(iter + 1))

        m_t = beta1 * m + (1 - beta1) * gradient
        v_t = beta2 * v + (1 - beta2) * gradient * gradient

        param_t = param - lr_t * (m_t / (np.sqrt(v_t) + epsilon) +
                                  weight_decay * param)
        return param_t, m_t, v_t

    param = init_value
    gradient = np.full(param.shape, 1.0 / np.prod(param.shape))
    m = np.zeros(param.shape)
    v = np.zeros(param.shape)
    for i in range(train_iters):
        param, m, v = adamw_update_numpy(
            param,
            gradient * random_masks_seq[i],
            i,
            m,
            v,
            learning_rate,
            beta1,
            beta2,
            epsilon,
            weight_decay,
        )

    assert np.allclose(
        x.flatten(),
        param.flatten(),
        rtol=1e-4,
        atol=1e-4,
    )
Ejemplo n.º 29
0
def _test_hybrid_concat(test_case,
                        static_shape,
                        axis,
                        max_dim_size=None,
                        verbose=False):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())

    def compare_var_diff(var_blob):
        test_case.assertTrue(
            np.array_equal(var_blob.numpy(),
                           np.ones(shape=static_shape, dtype=np.single)))

    rand_sub_shape = list(static_shape).copy()
    rand_sub_shape[axis] = random.randrange(1, static_shape[axis])
    rand_sub_shape = tuple(rand_sub_shape)

    @flow.global_function(type="train", function_config=func_config)
    def hybrid_concat_job(
        input_0_def: oft.ListNumpy.Placeholder(shape=static_shape,
                                               dtype=flow.float),
        input_1_def: oft.ListNumpy.Placeholder(shape=static_shape,
                                               dtype=flow.float),
    ):
        var = flow.get_variable(
            "var",
            shape=static_shape,
            dtype=flow.float,
            initializer=flow.random_uniform_initializer(),
            trainable=True,
        )
        constant = flow.constant(1.0, dtype=flow.float, shape=rand_sub_shape)
        inputs = [
            flow.cast_to_current_logical_view(input)
            for input in [var, input_0_def, input_1_def, constant]
        ]
        concated = flow.concat(
            inputs,
            axis=axis,
            max_dim_size=max_dim_size,
        )
        if verbose:
            print("concated static shape:", concated.shape)

        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [1e-3]),
                           momentum=0).minimize(concated)
        flow.watch_diff(var, compare_var_diff)

        if max_dim_size is None:
            test_case.assertTrue(
                concated.shape[axis] == (static_shape[axis] * 3 +
                                         rand_sub_shape[axis]))
        else:
            test_case.assertTrue(concated.shape[axis] == max_dim_size)

        return var, concated

    output, inputs = _rand_inputs(static_shape, axis, 2)
    if verbose:
        print("static_shape:", static_shape)
        print("input_0 shape:", inputs[0].shape)
        print("input_1 shape:", inputs[1].shape)
        print("output shape:", output.shape)
        print("rand_sub_shape:", rand_sub_shape)

    var, concated = hybrid_concat_job([inputs[0]], [inputs[1]]).get()
    if verbose:
        print("var shape:", var.numpy().shape)
        print("concated shape:", concated.numpy(0).shape)

    test_case.assertTrue(
        np.array_equal(
            np.concatenate(
                [
                    var.numpy(), output,
                    np.ones(shape=rand_sub_shape, dtype=np.single)
                ],
                axis=axis,
            ),
            concated.numpy(0),
        ))
Ejemplo n.º 30
0
def _test_masked_fill_fw_bw(test_case,
                            device,
                            x_shape,
                            mask_shape,
                            type_name,
                            value=0):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()

    if type_name == "float16":
        flow_type = flow.float
        np_type = np.float32
    else:
        flow_type = type_name_to_flow_type[type_name]
        np_type = type_name_to_np_type[type_name]

    func_config.default_data_type(flow_type)

    @flow.global_function(type="train", function_config=func_config)
    def test_masked_fill_fw_bw_job(
            x: oft.Numpy.Placeholder(x_shape, dtype=flow_type),
            mask: oft.Numpy.Placeholder(mask_shape, dtype=flow_type),
    ):
        with flow.scope.placement(device, "0:0"):
            y = flow.get_variable(
                name="vx",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            x += flow.cast(y, flow_type)
            mask = flow.cast(mask, dtype=flow.int8)
            if type_name == "float16":
                out = flow.cast(
                    flow.masked_fill(flow.cast(x, flow.float16), mask, value),
                    flow.float,
                )
            else:
                out = flow.masked_fill(x, mask, value)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(out)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(out, test_global_storage.Setter("out"))
            flow.watch_diff(out, test_global_storage.Setter("out_diff"))
            return out

    check_point = flow.train.CheckPoint()
    check_point.init()
    x = np.random.randint(low=0, high=100, size=x_shape)
    mask = np.random.randint(low=0, high=2, size=mask_shape)

    test_masked_fill_fw_bw_job(x.astype(np_type), mask.astype(np_type)).get()
    out_diff = test_global_storage.Get("out_diff")

    np_out, np_x_diff = _masked_fill_np_fw_bw(x, mask, out_diff, np_type,
                                              value)

    if type_name == "float16":
        tolerance = 1e-3
    else:
        tolerance = 1e-5

    test_case.assertTrue(
        np.allclose(np_out,
                    test_global_storage.Get("out"),
                    rtol=tolerance,
                    atol=tolerance))
    test_case.assertTrue(
        np.allclose(np_x_diff,
                    test_global_storage.Get("x_diff"),
                    rtol=tolerance,
                    atol=tolerance))