def compare_with_tensorflow(device_type, x_shape, data_type, axis): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() if data_type == "float16": dtype = flow.float else: dtype = type_name_to_flow_type[data_type] @flow.global_function(type="train", function_config=func_config) def SoftmaxJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1.0, maxval=1.0), trainable=True, ) x1 = x x = flow.identity(x) if data_type == "float16": loss = flow.cast( flow.nn.softmax(flow.cast(x, dtype=flow.float16), axis=axis), dtype=flow.float, ) else: loss = flow.nn.softmax(x, axis=axis) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) total_loss = loss * x1 flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(total_loss) return loss of_out = SoftmaxJob().get() with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.nn.softmax(x, axis=axis) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) if data_type == "float16": tolerance = 0.001 else: tolerance = 1e-05 assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=tolerance, atol=tolerance) assert np.allclose( test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=tolerance, atol=tolerance, )
def compare_with_numpy(device_type, input_shape, dtype, size, data_format, interpolation, align_corners): assert device_type in ["gpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="predict", function_config=func_config) def UpsampleJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "input", shape=input_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=False, ) loss = flow.layers.upsample_2d( x, size=size, data_format=data_format, interpolation=interpolation, align_corners=align_corners, ) flow.watch(x, test_global_storage.Setter("x1")) flow.watch(loss, test_global_storage.Setter("loss1")) return loss of_out = UpsampleJob().get() channel_pos = "channels_first" if data_format.startswith( "NC") else "channels_last" if align_corners: assert interpolation == "bilinear" x = test_global_storage.Get("x1") if data_format == "NHWC": x = np.transpose(x, axes=[0, 3, 1, 2]) coeffs_dict = {"bilinear": linear_coeffs} coeffs = coeffs_dict[interpolation] scaler = "align_corners" np_out = interpolate_nd(x, coeffs, scale_factors=size, scaler=scaler).astype(np.float32) of_out_np = of_out.numpy() if data_format == "NHWC": of_out_np = np.transpose(of_out_np, axes=[0, 3, 1, 2]) assert np.allclose(of_out_np, np_out, rtol=1e-05, atol=1e-05) else: x = test_global_storage.Get("x1") if data_format == "NHWC": x = np.transpose(x, axes=[0, 3, 1, 2]) coeffs_dict = {"bilinear": linear_coeffs, "nearest": nearest_coeffs} coeffs = coeffs_dict[interpolation] scaler = "pytorch_half_pixel" np_out = interpolate_nd(x, coeffs, scale_factors=size, scaler=scaler).astype(np.float32) of_out_np = of_out.numpy() if data_format == "NHWC": of_out_np = np.transpose(of_out_np, axes=[0, 3, 1, 2]) assert np.allclose(of_out_np, np_out, rtol=1e-05, atol=1e-05)
def compare_with_tensorflow(device_type, data_type, label_type, num_classes, batch_size): assert device_type in ["gpu", "cpu"] flow.clear_default_session() if device_type == "cpu": flow.config.gpu_device_num(0) flow.config.cpu_device_num(4) else: flow.config.gpu_device_num(4) func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def SparseSoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder( (batch_size, ), dtype=type_name_to_flow_type[label_type])): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=(batch_size, num_classes), dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) prediction = flow.nn.softmax(logits=x) with flow.scope.placement(device_type, "0:0-3"): lebels_distribute = flow.distribute.broadcast() prediction_distribute = flow.distribute.split( len(prediction.shape) - 1) loss = flow.nn.sparse_cross_entropy( labels=labels.with_distribute(lebels_distribute), prediction=prediction.with_distribute(prediction_distribute), ) with flow.scope.placement(device_type, "0:0"): loss = flow.math.square(loss) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss labels = np.random.randint(0, num_classes, size=(batch_size, )).astype( type_name_to_np_type[label_type]) of_out = SparseSoftmaxCrossEntropyWithLogitsJob(labels).get() with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, x) tf_out = tf.math.square(tf_out) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-05, atol=1e-05) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-05, atol=1e-05) flow.clear_default_session()
def compare_with_tensorflow(device_type, activation_type, shape, data_type): assert device_type in ["gpu", "cpu"] flow.clear_default_session() flow.config.enable_debug_mode(True) func_config = flow.FunctionConfig() if data_type == flow.float16: func_config.enable_auto_mixed_precision(True) data_type = flow.float func_config.default_data_type(data_type) of_activation_map = { "relu": flow.nn.relu, "sigmoid": flow.math.sigmoid, "tanh": flow.math.tanh, } tf_activation_map = { "relu": tf.nn.relu, "sigmoid": tf.math.sigmoid, "tanh": tf.math.tanh, } @flow.global_function(type="train", function_config=func_config) def ActivationJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=shape, dtype=data_type, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = of_activation_map[activation_type](x) lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [1e-4]) flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # OneFlow of_out = ActivationJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf_activation_map[activation_type](x) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) rtol = 1e-5 atol = 1e-5 assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol, atol) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol, atol)
def compare_with_numpy(test_case, device_type, input_shape, start_end_dim): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) start_dim = start_end_dim[0] end_dim = start_end_dim[1] @flow.global_function(type="train", function_config=func_config) def FlattenJob() -> flow.typing.Numpy: with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.flatten(x, start_dim=start_dim, end_dim=end_dim) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0 ).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = FlattenJob() # Numpy of_x = test_global_storage.Get("x") of_x_shape = of_x.shape of_x_diff = test_global_storage.Get("x_diff") true_end_dim = end_dim + len(of_x_shape) if end_dim < 0 else end_dim new_shape = [] for i in range(0, start_dim): new_shape.append(of_x_shape[i]) flatten_dim = 1 for i in range(start_dim, true_end_dim + 1): flatten_dim *= of_x_shape[i] new_shape.append(flatten_dim) for i in range(true_end_dim + 1, len(of_x_shape)): new_shape.append(of_x_shape[i]) np_out = np.reshape(of_x, tuple(new_shape)) test_case.assertTrue(of_out.shape == np_out.shape) test_case.assertTrue(np.allclose(of_out, np_out, rtol=1e-5, atol=1e-5)) test_case.assertTrue( np.allclose(of_x_diff, np.ones(of_x_diff.shape), rtol=1e-5, atol=1e-5) )
def compare_with_tensorflow(device_type, x_shape, y_shape, dtype, axis): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_logical_view(flow.scope.mirrored_view()) func_config.default_data_type(flow.float) func_config.train.primary_lr(1e-4) func_config.train.model_update_conf(dict(naive_conf={})) @flow.global_function(func_config) def ConcatJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) y = flow.get_variable( "y", shape=y_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) x = flow.cast_to_current_logical_view(x) y = flow.cast_to_current_logical_view(y) loss = flow.concat([x, y], axis) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(y, test_global_storage.Setter("y")) flow.watch_diff(y, test_global_storage.Setter("y_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = ConcatJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) y = tf.Variable(test_global_storage.Get("y")) tf_out = tf.concat([x, y], axis) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) tf_y_diff = tape.gradient(tf_out, y, loss_diff) assert np.array_equal(of_out.numpy(), tf_out.numpy()) assert np.array_equal(test_global_storage.Get("x_diff"), tf_x_diff.numpy()) assert np.array_equal(test_global_storage.Get("y_diff"), tf_y_diff.numpy())
def compare_with_tensorflow(device_type, input_shape, in_dtype, out_dtype, test_fuse_cast_scale_pass): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.enable_fuse_cast_scale(True) @flow.global_function(type="predict", function_config=func_config) def FusedCastScaleJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(), trainable=True, ) scale = flow.get_variable( "scale", shape=(1, ), dtype=flow.float, initializer=flow.random_uniform_initializer(), trainable=False, ) loss = flow.cast(x, dtype=type_name_to_flow_type[in_dtype]) if test_fuse_cast_scale_pass: loss = flow.cast( loss, dtype=type_name_to_flow_type[out_dtype]) * flow.cast( scale, dtype=type_name_to_flow_type[out_dtype]) else: loss = fused_cast_scale( loss, flow.cast(scale, dtype=type_name_to_flow_type[out_dtype]), name="fused_cast_scale", ) loss = flow.cast(loss, dtype=flow.float) flow.watch(x, test_global_storage.Setter("x")) flow.watch(scale, test_global_storage.Setter("scale")) flow.watch(loss, test_global_storage.Setter("loss")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = FusedCastScaleJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) scale = tf.Variable(test_global_storage.Get("scale")) tf_out = tf.cast(x, dtype=type_name_to_np_type[in_dtype]) tf_out = tf.cast(tf_out, dtype=type_name_to_np_type[out_dtype]) * tf.cast( scale, dtype=type_name_to_np_type[out_dtype]) tf_out = tf.cast(tf_out, dtype=tf.float32) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
def _test_batchnorm_relu(test_case, input_shape, axis, data_type): flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_logical_view(flow.scope.consistent_view()) func_config.default_data_type(flow.float32) @flow.global_function(type="train", function_config=func_config) def test_job(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),): v = flow.get_variable( name="v", shape=(1,), dtype=flow.float32, initializer=flow.zeros_initializer(), ) x = x + v x1 = flow.identity(x) x2 = flow.identity(x) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) x1 = flow.cast(x1, data_type) x2 = flow.cast(x2, data_type) y1 = flow.layers.batch_normalization_relu(x1, axis=axis, name="BN1") y2 = flow.math.relu(flow.layers.batch_normalization(x2, axis=axis, name="BN2")) y1 = flow.cast(y1, flow.float32) y2 = flow.cast(y2, flow.float32) flow.watch(y1, test_global_storage.Setter("y1")) flow.watch(y2, test_global_storage.Setter("y2")) y1 = flow.where(flow.math.greater(y2, v), y1, v) y2 = flow.where(flow.math.greater(y1, v), y2, v) loss = y1 + y2 flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(flow.math.reduce_sum(loss)) return loss x = np.random.rand(*input_shape).astype(np.float32) test_job(x).get() tol = 1e-3 if data_type == flow.float16 else 1e-5 y1 = test_global_storage.Get("y1") y2 = test_global_storage.Get("y2") test_case.assertTrue(np.allclose(y1, y2, rtol=tol, atol=tol)) x1_diff = test_global_storage.Get("x1_diff") x2_diff = test_global_storage.Get("x2_diff") test_case.assertTrue(np.allclose(x1_diff, x2_diff, rtol=tol, atol=tol))
def _compare_with_numpy(test_case, np_func, x, y, axis, keepdims=True): x = test_global_storage.Get("x") dx = test_global_storage.Get("x_diff") np_y = np_func(x, axis=axis, keepdims=True) test_case.assertTrue(np.allclose(y, np_y, rtol=1e-5, atol=1e-5)) mask = np.where(x == y, 1, 0) count = np.add.reduce(mask, axis=axis, keepdims=True) np_dx = np.where(x == y, 1 / count, 0) test_case.assertTrue(np.allclose(dx, np_dx, rtol=1e-5, atol=1e-5))
def compare_with_np(device_type, input_tensor, dim, dtype): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_logical_view(flow.scope.mirrored_view()) func_config.default_placement_scope( flow.scope.placement(device_type, "0:0")) output_np = np.diag(input_tensor, dim) output_shape = output_np.shape input_shape = input_tensor.shape output_dtype = output_np.dtype grad = np.random.random(output_shape).astype(output_dtype) @flow.global_function(type="train", function_config=func_config) def diag_job( input_tensor: tp.Numpy.Placeholder(shape=(input_shape), dtype=flow.float), ) -> tp.Numpy: input_var = flow.get_variable( "input_tensor", shape=(input_shape), dtype=flow.float, initializer=flow.zeros_initializer(), trainable=True, ) input_tensor = input_tensor + input_var input_tensor = flow.cast_to_current_logical_view(input_tensor) input_tensor = flow.cast(input_tensor, type_name_to_flow_type[dtype]) output = flow.diag(input_tensor, dim) if (output.dtype == flow.int64 or output.dtype == flow.int8 or output.dtype == flow.int32): output = flow.cast(output, flow.float) flow.optimizer.Adam( flow.optimizer.PiecewiseConstantScheduler([], [1e-4])).minimize(output) flow.watch(input_tensor, test_global_storage.Setter("x")) flow.watch_diff(input_tensor, test_global_storage.Setter("x_diff")) flow.watch(output, test_global_storage.Setter("output")) flow.watch_diff(output, test_global_storage.Setter("output_diff")) return output # OneFlow check_point = flow.train.CheckPoint() check_point.init() output_of = diag_job(input_tensor) output_diff = test_global_storage.Get("output_diff").astype(dtype) x_diff_of = test_global_storage.Get("x_diff").astype(dtype) # np x_diff_np = diag_grad_np(input_tensor, dim, output_np, output_diff) assert np.allclose(output_of, output_np) assert np.allclose(x_diff_of, x_diff_np)
def compare_with_tensorflow(device_type, x_shape, y_shape, dtype, axis): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_logical_view(flow.scope.mirrored_view()) func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def ConcatJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) y = flow.get_variable( "y", shape=y_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) x = flow.cast_to_current_logical_view(x) y = flow.cast_to_current_logical_view(y) loss = flow.concat([x, y], axis) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(y, test_global_storage.Setter("y")) flow.watch_diff(y, test_global_storage.Setter("y_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # OneFlow of_out = ConcatJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) y = tf.Variable(test_global_storage.Get("y")) tf_out = tf.concat([x, y], axis) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) tf_y_diff = tape.gradient(tf_out, y, loss_diff) assert np.array_equal(of_out.numpy(), tf_out.numpy()) assert np.array_equal(test_global_storage.Get("x_diff"), tf_x_diff.numpy()) assert np.array_equal(test_global_storage.Get("y_diff"), tf_y_diff.numpy())
def compare_with_tensorflow(device_type, data_type, label_type, num_classes, batch_size): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.train.primary_lr(1e-4) func_config.train.model_update_conf(dict(naive_conf={})) @flow.global_function(func_config) def SparseSoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder( (batch_size, ), dtype=type_name_to_flow_type[label_type])): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=(batch_size, num_classes), dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) prediction = flow.nn.softmax(logits=x) loss = flow.nn.sparse_cross_entropy(labels=labels, prediction=prediction) loss = flow.math.square(loss) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # fake labels labels = np.random.randint(0, num_classes, size=(batch_size, )).astype( type_name_to_np_type[label_type]) # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = SparseSoftmaxCrossEntropyWithLogitsJob(labels).get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, x) tf_out = tf.math.square(tf_out) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-5, atol=1e-5) flow.clear_default_session()
def compare_with_tensorflow(device_type, input_shape, dtype, size, data_format, interpolation): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def UpsampleJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "input", shape=input_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.layers.upsample_2d(x, size=size, data_format=data_format, interpolation=interpolation) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = UpsampleJob().get() channel_pos = "channels_first" if data_format.startswith( "NC") else "channels_last" # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x").astype(np.float32)) tf_out = tf.keras.layers.UpSampling2D(size=size, data_format=channel_pos, interpolation=interpolation)(x) loss_diff = test_global_storage.Get("loss_diff").astype(np.float32) tf_x_diff = tape.gradient(tf_out, x, loss_diff) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-5, atol=1e-5)
def compare_with_tensorflow(device_type, data_type, shape): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) def np_softmax(x): return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True) @flow.global_function(type="train", function_config=func_config) def SoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder( shape, dtype=type_name_to_flow_type[data_type])): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=shape, dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = flow.nn.softmax_cross_entropy_with_logits(labels=labels, logits=x) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # fake labels labels = np_softmax(np.random.uniform(size=shape)).astype( type_name_to_np_type[data_type]) # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = SoftmaxCrossEntropyWithLogitsJob(labels).get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.nn.softmax_cross_entropy_with_logits(labels, x) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-5, atol=1e-5) flow.clear_default_session()
def compare_with_tensorflow(device_type, a_shape, b_shape, transpose_a, transpose_b): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.train.primary_lr(1e-4) func_config.train.model_update_conf(dict(naive_conf={})) @flow.global_function(func_config) def MatmulJob(): with flow.scope.placement(device_type, "0:0"): a = flow.get_variable( "a", shape=a_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) b = flow.get_variable( "b", shape=b_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = flow.matmul(a, b, transpose_a, transpose_b) flow.losses.add_loss(loss) flow.watch(a, test_global_storage.Setter("a")) flow.watch_diff(a, test_global_storage.Setter("a_diff")) flow.watch(b, test_global_storage.Setter("b")) flow.watch_diff(b, test_global_storage.Setter("b_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = MatmulJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: a = tf.Variable(test_global_storage.Get("a")) b = tf.Variable(test_global_storage.Get("b")) tf_out = tf.matmul(a, b, transpose_a, transpose_b) loss_diff = test_global_storage.Get("loss_diff") tf_a_diff = tape.gradient(tf_out, a, loss_diff) tf_b_diff = tape.gradient(tf_out, b, loss_diff) assert np.allclose(of_out.numpy(), tf_out.numpy(), atol=1e-03), np.max( np.abs(of_out.numpy() - tf_out.numpy()) ) assert np.allclose(test_global_storage.Get("a_diff"), tf_a_diff.numpy(), atol=1e-03) assert np.allclose(test_global_storage.Get("b_diff"), tf_b_diff.numpy(), atol=1e-03)
def test_TestMultiInput_grad_mirrored_inplace(test_case): func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.mirrored_view()) shape = ( 3, 3, ) @flow.global_function(type="train", function_config=func_config) def TestMultiInputJob(): with flow.scope.placement("gpu", "0:0"): x1 = flow.get_variable( "x1", shape=shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) x2 = flow.get_variable( "x2", shape=shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = TestMultiInput(x1, x2) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x1, test_global_storage.Setter("x1")) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch(x2, test_global_storage.Setter("x2")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) return loss check_point = flow.train.CheckPoint() check_point.init() out = TestMultiInputJob().get() x1_diff = test_global_storage.Get("x1_diff") x2_diff = test_global_storage.Get("x2_diff") expect_out = test_global_storage.Get("x1") expect_x1_diff = np.ones(shape, dtype=np.float32) expect_x2_diff = np.ones(shape, dtype=np.float32) * 2.0 # print(x1_diff, x2_diff) # print(expect_x1_diff, expect_x2_diff) assert np.allclose(out.numpy(), expect_out) assert np.allclose(x1_diff, expect_x1_diff) assert np.allclose(x2_diff, expect_x2_diff)
def compare_with_tensorflow(device_type, x_shape, data_type, axes): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) if max(axes) >= len(x_shape): return @flow.global_function(type="train", function_config=func_config) def MomentsJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) (m, v) = flow.nn.moments(x, axes) loss = m + v flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return (m, v) # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = MomentsJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.nn.moments(x, axes) tf_loss = tf_out[0] + tf_out[1] loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_loss, x, loss_diff) for i in range(2): assert np.allclose(of_out[i].numpy(), tf_out[i].numpy(), rtol=1e-5, atol=1e-5) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-5, atol=1e-5)
def compare_with_tensorflow(device_type, data_type, shape): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() dtype = type_name_to_flow_type[data_type] def np_sigmoid(x): return 1 / (1 + np.exp(-x)) @flow.global_function(type="train", function_config=func_config) def SigmoidCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder( shape, dtype)): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=shape, dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = flow.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=x) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss labels = np_sigmoid(np.random.randint(0, 10, size=shape)).astype( type_name_to_np_type[data_type]) of_out = SigmoidCrossEntropyWithLogitsJob(labels).get() with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.nn.sigmoid_cross_entropy_with_logits(labels, x) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) tolerance = 1e-05 assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=tolerance, atol=tolerance) assert np.allclose( test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=tolerance, atol=tolerance, ) flow.clear_default_session()
def test_TestMultiInput_grad_mirrored_inplace(test_case): func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.mirrored_view()) func_config.train.primary_lr(1e-4) func_config.train.model_update_conf(dict(naive_conf={})) shape = ( 3, 3, ) @flow.global_function(func_config) def TestMultiInputJob(): with flow.scope.placement("gpu", "0:0"): x1 = flow.get_variable( "x1", shape=shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) x2 = flow.get_variable( "x2", shape=shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = TestMultiInput(x1, x2) flow.losses.add_loss(loss) flow.watch(x1, test_global_storage.Setter("x1")) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch(x2, test_global_storage.Setter("x2")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) return loss check_point = flow.train.CheckPoint() check_point.init() out = TestMultiInputJob().get() x1_diff = test_global_storage.Get("x1_diff") x2_diff = test_global_storage.Get("x2_diff") expect_out = test_global_storage.Get("x1") expect_x1_diff = np.ones(shape, dtype=np.float32) expect_x2_diff = np.ones(shape, dtype=np.float32) * 2.0 # print(x1_diff, x2_diff) # print(expect_x1_diff, expect_x2_diff) assert np.allclose(out.numpy(), expect_out) assert np.allclose(x1_diff, expect_x1_diff) assert np.allclose(x2_diff, expect_x2_diff)
def compare_fused_with_no_fused(test_case, batch_size, seq_len, num_heads, head_size, fp16, verbose=False): hidden_size = num_heads * 3 * head_size input = gen_random_input((seq_len, batch_size, hidden_size)) # fused op func = make_self_attn_qk_v_func(batch_size, seq_len, num_heads, head_size, True, fp16) qmk, v = func(input) # unfused op func_ = make_self_attn_qk_v_func(batch_size, seq_len, num_heads, head_size, False, fp16) qmk_, v_ = func_(input) # np _q, _k, _v = np_qkv(input, head_size) _qmk = np_bgemm(_q.transpose(1, 2, 0, 3), _k.transpose(1, 2, 3, 0), get_alpha(head_size)) _v = _v.transpose(1, 2, 0, 3) if verbose: print("") print("=" * 80) print(f"input: {input.shape}\n{input}") print(f"_q: {_q.shape}\n{_q}") print(f"_k: {_k.shape}\n{_k}") print(f"_v: {_v.shape}\n{_v}") print(f"_qmk: {_qmk.shape}\n{_qmk}") print(f"qmk: {qmk.shape}\n{qmk}") print(f"qmk_: {qmk_.shape}\n{qmk_}") diff = qmk - qmk_ print("abs diff mean:", np.abs(diff).mean()) print("abs diff max:", np.abs(diff).max()) test_case.assertTrue(np.allclose(qmk, qmk_)) test_case.assertTrue(np.allclose(qmk, _qmk)) test_case.assertTrue(np.allclose(v, v_)) test_case.assertTrue(np.allclose(v, _v)) h_grad = test_global_storage.Get("h_grad_fused") h_grad_ = test_global_storage.Get("h_grad") if verbose: print(f"h_grad: {h_grad.shape}\n{h_grad}") print(f"h_grad_: {h_grad_.shape}\n{h_grad_}") test_case.assertTrue(np.allclose(h_grad, h_grad_))
def of_run(device_type, x_shape, data_type, rate, seed): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() if data_type == "float16": func_config.enable_auto_mixed_precision(True) dtype = flow.float else: dtype = type_name_to_flow_type[data_type] func_config.train.primary_lr(1e-4) func_config.train.model_update_conf(dict(naive_conf={})) @flow.global_function(func_config) def DropoutJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=1, maxval=10), trainable=True, ) of_out = flow.nn.dropout(x, rate=rate, seed=seed, name="dropout") loss = flow.math.square(of_out) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(of_out, test_global_storage.Setter("out")) flow.watch_diff(of_out, test_global_storage.Setter("out_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = DropoutJob().get() of_out = test_global_storage.Get("out") out_diff = test_global_storage.Get("out_diff") assert np.allclose([1 - np.count_nonzero(of_out) / of_out.size], [rate], atol=rate / 5) x = test_global_storage.Get("x") x_diff = test_global_storage.Get("x_diff") out_scale = of_out[np.where(of_out != 0)] / x[np.where(of_out != 0)] diff_scale = x_diff[np.where(of_out != 0)] / out_diff[np.where( of_out != 0)] assert np.allclose(out_scale, 1.0 / (1.0 - rate), atol=1e-5) assert np.allclose(diff_scale, 1.0 / (1.0 - rate), atol=1e-5)
def of_run(device_type, x_shape, data_type, rate): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() if data_type == "float16": dtype = flow.float else: dtype = type_name_to_flow_type[data_type] @flow.global_function(type="train", function_config=func_config) def DropoutJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1, maxval=1), trainable=True, ) if data_type == "float16": x = flow.cast(flow.cast(x, flow.float16), dtype) of_out = flow.cast( flow.nn.dropout(flow.cast(x, flow.float16), rate=rate, name="dropout"), dtype, ) else: of_out = flow.nn.dropout(x, rate=rate, name="dropout") loss = flow.math.square(of_out) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(of_out, test_global_storage.Setter("out")) flow.watch_diff(of_out, test_global_storage.Setter("out_diff")) return loss of_out = DropoutJob().get() of_out = test_global_storage.Get("out") out_diff = test_global_storage.Get("out_diff") assert np.allclose([1 - np.count_nonzero(of_out) / of_out.size], [rate], atol=1e-4) x = test_global_storage.Get("x") x_diff = test_global_storage.Get("x_diff") out_scale = of_out[np.where(of_out != 0)] / x[np.where(of_out != 0)] diff_scale = x_diff[np.where(of_out != 0)] / out_diff[np.where( of_out != 0)] assert np.allclose(out_scale, 1.0 / (1.0 - rate), atol=1e-05) assert np.allclose(diff_scale, 1.0 / (1.0 - rate), atol=1e-05)
def _run_test(test_case, device, out_shape, num_segments, segment_ids_shape): segment_ids = _gen_segment_ids(out_shape, num_segments, segment_ids_shape) data = _gen_data(out_shape, num_segments, segment_ids_shape) unsorted_batch_segment_sum_out = _make_unsoted_segment_sum_fn( device, data, segment_ids, num_segments).get() out_ndarray = unsorted_batch_segment_sum_out.numpy() grad_in_ndarray = test_global_storage.Get("x_diff") grad_out_ndarray = test_global_storage.Get("loss_diff") _check(test_case, data, segment_ids, out_shape, out_ndarray) _check_bw(test_case, grad_out_ndarray, segment_ids, grad_in_ndarray.shape, grad_in_ndarray)
def compare_with_tensorflow(device_type, x_shape, data_type, axis): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() if data_type == "float16": func_config.enable_auto_mixed_precision(True) dtype = flow.float else: dtype = type_name_to_flow_type[data_type] func_config.train.primary_lr(1e-4) func_config.train.model_update_conf(dict(naive_conf={})) @flow.global_function(func_config) def SoftmaxJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = flow.nn.softmax(x, axis=axis) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = SoftmaxJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.nn.softmax(x, axis=axis) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-5, atol=1e-5)
def compare_reduce_sum_with_tensorflow(device_type, input_shape, axis, keepdims, rtol=1e-5, atol=1e-5): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.train.primary_lr(1e-4) func_config.train.model_update_conf(dict(naive_conf={})) @flow.global_function(func_config) def ReduceSumJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = ReduceSumJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.math.reduce_sum(x, axis=axis, keepdims=keepdims) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-5, atol=1e-5)
def compare_with_np(device_type, x_shape, like0_shape, like1_shape, dtype): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def SplitLikeJob(x: oft.Numpy.Placeholder(x_shape, dtype=flow.float)): v = flow.get_variable( "x", shape=x_shape, dtype=flow.float, initializer=flow.constant_initializer(0), trainable=True, ) x += v like0 = flow.constant(0, dtype=flow.float, shape=like0_shape) like1 = flow.constant(0, dtype=flow.float, shape=like1_shape) with flow.scope.placement("gpu", "0:0"): y0, y1 = split_like(x, [like0, like1], "split_like") loss = y0 flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return y0, y1 # OneFlow check_point = flow.train.CheckPoint() check_point.init() x = np.random.randn(*x_shape).astype(np.float32) y0, y1 = SplitLikeJob(x).get() assert (like0_shape[0] + like1_shape[0]) == x_shape[0] np_y0 = x[0:like0_shape[0]] np_y1 = x[like0_shape[0]:] zeros = np.zeros(np_y1.shape, dtype=np.float32) np_x_diff = np.concatenate([test_global_storage.Get("loss_diff"), zeros], axis=0) assert np.array_equal(y0.numpy(), np_y0) assert np.array_equal(y1.numpy(), np_y1) assert np.array_equal(test_global_storage.Get("x_diff"), np_x_diff)
def RunOneflowBiasAdd(data_type, device_type, value, bias, flow_args): flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def FlowJob( value: oft.Numpy.Placeholder(value.shape), bias: oft.Numpy.Placeholder(bias.shape), ): with flow.scope.placement(device_type, "0:0"): value += flow.get_variable( name="v1", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) bias += flow.get_variable( name="v2", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) if data_type == "float16": comp_value = flow.cast(value, dtype=flow.float16) comp_bias = flow.cast(bias, dtype=flow.float16) else: comp_value = value comp_bias = bias loss = flow.nn.bias_add(comp_value, comp_bias, *flow_args) if data_type == "float16": loss = flow.cast(loss, dtype=flow.float) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0).minimize(loss) flow.watch_diff(value, test_global_storage.Setter("value_diff")) flow.watch_diff(bias, test_global_storage.Setter("bias_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() y = FlowJob(value, bias).get().numpy() value_diff = test_global_storage.Get("value_diff") bias_diff = test_global_storage.Get("bias_diff") return y, value_diff, bias_diff
def _test_logical_slice(test_case, var_shape, slice_tuples, split_axis, device_tag, flow_dtype, device_num): flow.clear_default_session() if device_tag == "gpu": flow.config.gpu_device_num(device_num) @flow.global_function() def slice_fn(): with flow.scope.placement(device_tag, "0:0-{}".format(device_num - 1)): var = flow.get_variable( name="var", shape=var_shape, dtype=flow_dtype, initializer=flow.random_uniform_initializer(-10, 10, dtype=flow_dtype), distribute=flow.distribute.split(split_axis), ) flow.watch(var, test_global_storage.Setter("var")) ret = flow.experimental.logical_slice(var, slice_tuples) return ret checkpoint = flow.train.CheckPoint() checkpoint.init() of_res = slice_fn().get().numpy() var_np = test_global_storage.Get("var") slice_objs = [] for s in slice_tuples: slice_objs.append(slice(s[0], s[1], s[2])) test_case.assertTrue(np.array_equal(of_res, var_np[tuple(slice_objs)]))
def RunOneflowOp(device_type, flow_op, x, flow_args): flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def FlowJob(x: oft.Numpy.Placeholder(x.shape)): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="v1", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) loss = flow_op(x, *flow_args) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0).minimize(loss) flow.watch_diff(x, test_global_storage.Setter("x_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() y = FlowJob(x).get().numpy() x_diff = test_global_storage.Get("x_diff") return y, x_diff
def compare_with_tensorflow(device_type, input_shape, perm): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def TransposeJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "input", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.transpose(x, perm) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss # OneFlow check_point = flow.train.CheckPoint() check_point.init() of_out = TransposeJob().get() # TensorFlow with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.transpose(x, perm) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5) assert np.allclose(test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-5, atol=1e-5)