def test_op_function_float(test_case): arg_dict = _gen_arg_dict("gpu", "float", "0:0-1", 2) for arg in GenArgList(arg_dict): _compare_op_function_with_samples(test_case, *arg)
def test_dim_gather_float_gpu(test_case): arg_dict = _gen_arg_dict("gpu", "float", "0:0", 1) for arg in GenArgList(arg_dict): _compare_dim_gather_with_samples(test_case, *arg)
def test_bceloss_gpu_1n2d(test_case): arg_dict = _gen_arg_dict( shape=(3, 16, 16), device_type="gpu", machine_ids="0:0-1", device_counts=2 ) for arg in GenArgList(arg_dict): _compare_bceloss_with_np(*arg)
def test_nms(test_case): arg_dict = OrderedDict() arg_dict["test_fun"] = [_test_nms] arg_dict["device"] = ["cuda"] for arg in GenArgList(arg_dict): arg[0](test_case, *arg[1:])
def test_split_to_broadcast(test_case): arg_dict = OrderedDict() arg_dict["src_axis"] = [0, 1] for arg in GenArgList(arg_dict): _test_split_to_broadcast(test_case, *arg)
def test_zeros_gpu(test_case): arg_dict = _gen_arg_dict( shape=(3, 16, 32), device_type="gpu", machine_ids="0:0", device_counts=1, ) for arg in GenArgList(arg_dict): _compare_zeros_with_np(*arg)
def test_square(test_case): arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu"] arg_dict["x_shape"] = [(10, 20, 30)] for arg in GenArgList(arg_dict): compare_with_tensorflow(*arg)
def test_embedding(test_case): arg_dict = OrderedDict() arg_dict["device"] = ["cpu", "cuda"] for arg in GenArgList(arg_dict): _test_embedding_impl(test_case, *arg)
def test_linear_backward(test_case): arg_dict = OrderedDict() arg_dict["test_fun"] = [_test_linear_backward_with_bias] arg_dict["device"] = ["cpu", "cuda"] for arg in GenArgList(arg_dict): arg[0](test_case, *arg[1:])
def test_dropout_module(test_case): arg_dict = OrderedDict() arg_dict["device_type"] = ["cpu", "gpu"] arg_dict["x_shape"] = [(2, 2, 2, 2)] arg_dict["data_type"] = ["float32"] arg_dict["rate"] = [0.75] arg_dict["seed"] = [12345] literals = { "cpu": [ np.array([ 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 4.0, 4.0, 0.0, 0.0, 4.0, ]), np.array([ 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, ]), ], "gpu": [ np.array([ 4.0, 4.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, ]), np.array([ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 4.0, 0.0, ]), ], } for arg in GenArgList(arg_dict): of_out_a, of_out_b = of_run_module(*arg) test_case.assertEqual( (np.abs(literals[arg[0]][0] - of_out_a.flatten()) < 10e-7).all(), True) test_case.assertEqual( (np.abs(literals[arg[0]][1] - of_out_b.flatten()) < 10e-7).all(), True)
def gen_arg_list(): arg_dict = OrderedDict() arg_dict["device_type"] = ["cpu", "gpu"] arg_dict["in_shape"] = [(1, 10, 1, 10, 1)] arg_dict["axis"] = [None, [2], [-3], [0, 2, 4], [-1, -3, -5]] return GenArgList(arg_dict)
def test_layer_norm(_): confs = [{ "x_shape": (4, 5, 2, 6), "begin_norm_axis": -1, "begin_params_axis": -1 }] arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu"] arg_dict["confs"] = confs arg_dict["data_type"] = ["float32"] arg_dict["trainable"] = [True, False] arg_dict["center"] = [True, False] arg_dict["scale"] = [True, False] arg_dict["epsilon"] = [0.0, 1e-10] for case in GenArgList(arg_dict): (device_type, confs, data_type, trainable, center, scale, epsilon) = case x_shape = confs["x_shape"] begin_norm_axis = confs["begin_norm_axis"] begin_params_axis = confs["begin_params_axis"] flow.clear_default_session() # Random inputs x = np.random.randn(*x_shape).astype(type_name_to_np_type[data_type]) dim = len(x.shape) - 2 # TF results with tf.GradientTape(persistent=True) as tape: x_tf = tf.Variable(x) y_tf = tf.keras.layers.LayerNormalization( axis=begin_norm_axis, epsilon=epsilon, center=center, scale=scale, beta_initializer="zeros", gamma_initializer="ones", beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, trainable=trainable, )(x_tf) dx_tf = tape.gradient(y_tf, x_tf, tf.constant(1.0, shape=y_tf.shape)) def assert_grad(b): assert np.allclose(dx_tf.numpy(), b.numpy(), rtol=1e-5, atol=1e-5), ( case, dx_tf.numpy(), b.numpy(), ) # 1F results dtype = type_name_to_flow_type[data_type] func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def test_job(x: oft.Numpy.Placeholder(x_shape, dtype=dtype)): v = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.constant_initializer(0), trainable=True, ) flow.watch_diff(v, assert_grad) x += v with flow.scope.placement(device_type, "0:0"): y = flow.layers.layer_norm( x, begin_norm_axis=begin_norm_axis, begin_params_axis=begin_params_axis, center=center, scale=scale, ) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(y) return y check_point = flow.train.CheckPoint() check_point.init() y = test_job(x).get() assert y.numpy().shape == y_tf.numpy().shape, ( y.numpy().shape, y_tf.numpy().shape, ) diff = y.numpy() - y_tf.numpy() max_diff = np.max(np.abs(diff)) assert np.allclose(y.numpy(), y_tf.numpy(), rtol=1e-5, atol=2e-3), ( case, max_diff, )
def test_instruction_replay(test_case): arg_dict = OrderedDict() arg_dict["device"] = ["cpu", "cuda"] arg_dict["shape"] = [[2, 3], [1, 10]] for arg in GenArgList(arg_dict): _test_instruction_replay_impl(test_case, *arg)
def test_dim_gather_int(test_case): arg_dict = _gen_arg_dict("gpu", "int", "0:0-1", 2) for arg in GenArgList(arg_dict): _compare_dim_gather_with_samples(test_case, *arg)
def test_variable_as_loss_on_two_device(test_case): arg_dict = OrderedDict() arg_dict["mirrored"] = [True, False] for arg in GenArgList(arg_dict): do_test(test_case, *arg)
def test_bernoulli(test_case): arg_dict = OrderedDict() arg_dict["test_functions"] = [_test_bernoulli] arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 3, 4, 5)] for arg in GenArgList(arg_dict): arg[0](test_case, *arg[1:])
def test_pool(_): arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu", "cpu"] arg_dict["pool_conf"] = pool_confs arg_dict["data_type"] = ["float32"] arg_dict["pooling_type"] = ["AVG", "MAX"] arg_dict["is_dynamic"] = [True, False] for case in GenArgList(arg_dict): (device_type, pool_conf, data_type, pooling_type, is_dynamic) = case x_shape = pool_conf["x_shape"] ksize = pool_conf["ksize"] strides = pool_conf["strides"] padding = pool_conf["padding"] data_format = pool_conf["data_format"] if os.getenv("ONEFLOW_TEST_CPU_ONLY") and data_format != "NHWC": continue flow.clear_default_session() # Random inputs x = np.random.randn(*x_shape).astype( type_name_to_np_type[data_type]) dim = len(x.shape) - 2 # TODO: these cases will fail in old implementation if dim == 3 and data_format == "NDHWC": continue # TF results with tf.GradientTape(persistent=True) as tape: x_tf = tf.Variable(x) strides = _GetSequence(strides, dim, "strides") pooling_f = None if pooling_type == "AVG": pooling_f = getattr(tf.nn, "avg_pool{}d".format(dim)) elif pooling_type == "MAX": pooling_f = getattr(tf.nn, "max_pool{}d".format(dim)) else: raise ValueError("pooling_type must be AVG or MAX") y_tf = pooling_f(x_tf, ksize, strides, padding, data_format=data_format) dx_tf = tape.gradient(y_tf, x_tf, tf.constant(1.0, shape=y_tf.shape)) def assert_grad(b): # TODO(hanbinbin): In eager mode, cannot derive b's is_dynamic correctly, therefore, using if .. else ... # Don't warry, is_dynamic will be removed in the next refactor and the problem will gone. if b.is_dynamic: b_ndarray = b.numpy_list()[0] else: b_ndarray = b.numpy() assert np.allclose(dx_tf.numpy(), b_ndarray), ( case, dx_tf.numpy(), b_ndarray, ) # 1F results dtype = type_name_to_flow_type[data_type] func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) tensor_def = None if is_dynamic: func_config.default_logical_view(flow.scope.mirrored_view()) tensor_def = oft.ListNumpy.Placeholder else: tensor_def = oft.Numpy.Placeholder @flow.global_function(type="train", function_config=func_config) def pooling_job(x: tensor_def(x_shape, dtype=dtype)): v = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.constant_initializer(0), trainable=True, ) v = flow.cast_to_current_logical_view(v) flow.watch_diff(v, assert_grad) x += v with flow.scope.placement(device_type, "0:0"): pooling_f = None if pooling_type == "AVG": pooling_f = getattr(flow.nn, "avg_pool{}d".format(dim)) elif pooling_type == "MAX": pooling_f = getattr(flow.nn, "max_pool{}d".format(dim)) else: raise ValueError("pooling_type must be AVG or MAX") y = pooling_f( x, ksize=ksize, strides=strides, padding=padding, data_format=data_format, ) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(y) return y if is_dynamic: x = [x] y = pooling_job(x).get() y_ndarray = None if is_dynamic: y_ndarray = y.numpy_list()[0] else: y_ndarray = y.numpy() assert y_ndarray.shape == y_tf.numpy().shape, ( y_ndarray.shape, y_tf.numpy().shape, ) assert np.allclose(y_ndarray, y_tf.numpy(), rtol=1e-5, atol=1e-5), ( case, y_ndarray - y_tf.numpy(), )
def test_slice(test_case): arg_dict = OrderedDict() arg_dict["test_fun"] = [_test_slice, _test_slice_backward] arg_dict["device"] = ["cpu", "cuda"] for arg in GenArgList(arg_dict): arg[0](test_case, *arg[1:])
def test_smooth_l1_loss(_): arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu", "cpu"] arg_dict["prediction_shape"] = [ (100, ), (10, 10), ] arg_dict["data_type"] = ["float32", "double"] arg_dict["beta"] = [0, 0.5, 1] for case in GenArgList(arg_dict): device_type, prediction_shape, data_type, beta = case assert device_type in ["gpu", "cpu"] assert data_type in ["float32", "double", "int8", "int32", "int64"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) prediction = np.random.randn(*prediction_shape).astype( type_name_to_np_type[data_type]) label = np.random.randn(*prediction_shape).astype( type_name_to_np_type[data_type]) np_result = gen_numpy_data(prediction, label, beta) def assert_prediction_grad(b): prediction_grad = np_result["prediction_grad"] assert prediction_grad.dtype == type_name_to_np_type[data_type] assert np.allclose(prediction_grad, b.numpy()), ( case, prediction_grad, b.numpy(), ) @flow.global_function(type="train", function_config=func_config) def TestJob( prediction: oft.Numpy.Placeholder( prediction_shape, dtype=type_name_to_flow_type[data_type]), label: oft.Numpy.Placeholder( prediction_shape, dtype=type_name_to_flow_type[data_type]), ): v = flow.get_variable( "prediction", shape=prediction_shape, dtype=type_name_to_flow_type[data_type], initializer=flow.constant_initializer(0), trainable=True, ) flow.watch_diff(v, assert_prediction_grad) prediction += v with flow.scope.placement(device_type, "0:0"): loss = flow.smooth_l1_loss(prediction, label, beta) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0, ).minimize(loss) return loss loss_np = np_result["loss"] assert loss_np.dtype == type_name_to_np_type[data_type] loss = TestJob(prediction, label).get().numpy() assert np.allclose(loss_np, loss), (case, loss_np, loss)
def test_partial_sum_to_split(test_case): arg_dict = OrderedDict() arg_dict["dst_axis"] = [0, 1] for arg in GenArgList(arg_dict): _test_partial_sum_to_split(test_case, *arg)
def test_layer_norm(_): confs = [ {"x_shape": (40, 1023)}, {"x_shape": (40, 1024)}, {"x_shape": (40, 2047)}, {"x_shape": (40, 2048)}, {"x_shape": (40, 16384)}, ] arg_dict = OrderedDict() arg_dict["device_type"] = ["cpu", "gpu"] arg_dict["confs"] = confs arg_dict["data_type"] = ["float32", "float16"] arg_dict["trainable"] = [True, False] arg_dict["center"] = [True, False] arg_dict["scale"] = [True, False] arg_dict["epsilon"] = [1e-05] arg_dict["fuse_add_to_output"] = [True, False] for case in GenArgList(arg_dict): ( device_type, confs, data_type, trainable, center, scale, epsilon, fuse_add_to_output, ) = case if device_type == "cpu" and data_type == "float16": continue if device_type == "cpu" and fuse_add_to_output == True: continue x_shape = confs["x_shape"] if device_type == "cpu" and x_shape[1] != 1024: continue begin_norm_axis = 1 begin_params_axis = 1 flow.clear_default_session() assert ( begin_norm_axis == begin_params_axis ), "tf doesn't support a dedicated begin_params_axis" if data_type == "float16": x = ( np.random.uniform(low=-1, high=1, size=x_shape) .astype(np.float16) .astype(np.float32) ) else: x = np.random.uniform(low=-1, high=1, size=x_shape).astype( type_name_to_np_type[data_type] ) dim = len(x.shape) - 2 with tf.GradientTape(persistent=True) as tape: x_tf = tf.Variable(x) if data_type == "float16": x_tf = tf.cast(x_tf, dtype=tf.float16) tf.keras.backend.set_floatx("float16") else: tf.keras.backend.set_floatx("float32") layer = tf.keras.layers.LayerNormalization( axis=begin_norm_axis, epsilon=epsilon, center=center, scale=scale, beta_initializer="zeros", gamma_initializer="ones", beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, trainable=trainable, ) y_tf = layer(x_tf) y_tf = tf.math.sigmoid(y_tf) z_tf = y_tf + x_tf if data_type == "float16": dx_tf = tape.gradient( z_tf, x_tf, tf.constant(1.0, shape=z_tf.shape, dtype=tf.float16) ) else: dx_tf = tape.gradient(z_tf, x_tf, tf.constant(1.0, shape=z_tf.shape)) grad = tape.gradient(z_tf, layer.trainable_variables) if trainable: if scale and center: tf_gamma_diff = grad[0] tf_beta_diff = grad[1] elif scale and (not center): tf_gamma_diff = grad[0] elif not scale and center: tf_beta_diff = grad[0] else: pass else: pass def assert_grad(b): if data_type == "float16": dx_of = b.numpy().astype(np.float16) rtol = 0.001 atol = 0.05 else: dx_of = b.numpy() rtol = 1e-5 atol = 1e-5 diff = dx_tf.numpy() - dx_of max_diff = np.max(np.abs(diff)) assert np.allclose(dx_tf.numpy(), dx_of, rtol=rtol, atol=atol), ( case, max_diff, ) def assert_grad_gamma(b): if data_type == "float16": of_gamma_diff = b.numpy().astype(np.float16) rtol = 0.001 atol = 0.05 else: of_gamma_diff = b.numpy() rtol = 1e-5 atol = 1e-5 diff = tf_gamma_diff.numpy() - of_gamma_diff max_diff = np.max(np.abs(diff)) assert np.allclose( tf_gamma_diff.numpy(), of_gamma_diff, rtol=rtol, atol=atol ), (case, max_diff) def assert_grad_beta(b): if data_type == "float16": of_beta_diff = b.numpy().astype(np.float16) rtol = 0.001 atol = 0.05 else: of_beta_diff = b.numpy() rtol = 1e-5 atol = 1e-5 diff = tf_beta_diff.numpy() - of_beta_diff max_diff = np.max(np.abs(diff)) assert np.allclose( tf_beta_diff.numpy(), of_beta_diff, rtol=rtol, atol=atol ), (case, max_diff) if data_type == "float16": dtype = flow.float else: dtype = type_name_to_flow_type[data_type] func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.enable_fuse_add_to_output(fuse_add_to_output) @flow.global_function(type="train", function_config=func_config) def test_job(x: oft.Numpy.Placeholder(x_shape, dtype=dtype)): v = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.constant_initializer(0), trainable=True, ) flow.watch_diff(v, assert_grad) x += v if data_type == "float16": x = flow.cast(x, dtype=flow.float16) with flow.scope.placement(device_type, "0:0"): param_shape = x.shape[begin_params_axis:] gamma = None beta = None if center: with flow.scope.namespace("LayerNorm"): beta = flow.get_variable( name="beta", shape=param_shape, dtype=flow.float, initializer=flow.constant_initializer(0.0), trainable=trainable, model_name="beta", reuse=False, ) if trainable: flow.watch_diff(beta, assert_grad_beta) if data_type == "float16": beta = flow.cast(beta, dtype=flow.float16) if scale: with flow.scope.namespace("LayerNorm"): gamma = flow.get_variable( name="gamma", shape=param_shape, dtype=flow.float, initializer=flow.constant_initializer(1.0), trainable=trainable, model_name="gamma", reuse=False, ) if trainable: if data_type == "float16": flow.watch_diff(gamma, assert_grad_gamma) else: flow.watch_diff(gamma, assert_grad_gamma) if data_type == "float16": gamma = flow.cast(gamma, dtype=flow.float16) x = flow.identity(x) y = flow.nn.layer_norm( x, gamma=gamma, beta=beta, begin_norm_axis=begin_norm_axis, begin_params_axis=begin_params_axis, epsilon=epsilon, ) y = flow.math.sigmoid(y) z = y + x if data_type == "float16": y = flow.cast(y, dtype=flow.float) z = flow.cast(z, dtype=flow.float) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0 ).minimize(z) return y y = test_job(x).get() if data_type == "float16": y_of = y.numpy().astype(np.float16) else: y_of = y.numpy() assert y_of.shape == y_tf.numpy().shape, ( y_of.shape, y_tf.numpy().shape, ) diff = y_of.astype(np.float16) - y_tf.numpy() max_diff = np.max(np.abs(diff)) assert np.allclose(y_of, y_tf.numpy(), rtol=1e-05, atol=0.002), ( case, max_diff, )
def test_bceloss_cpu(test_case): arg_dict = _gen_arg_dict( shape=(3, 3), device_type="cpu", machine_ids="0:0", device_counts=1 ) for arg in GenArgList(arg_dict): _compare_bceloss_with_np(*arg)
def test_div(test_case): arg_dict = OrderedDict() arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)] arg_dict["device"] = ["cpu", "cuda"] for arg in GenArgList(arg_dict): _test_div_impl(test_case, *arg)
def test_op_function_int_gpu(test_case): arg_dict = _gen_arg_dict("gpu", "int", "0:0", 1) for arg in GenArgList(arg_dict): _compare_op_function_with_samples(test_case, *arg)
def test_atan2_backward(test_case): arg_dict = OrderedDict() arg_dict["device"] = ["cpu", "cuda"] for arg in GenArgList(arg_dict): _test_atan2_backward(test_case, *arg)