def _test_model_io(test_case, shape, dtype, lr, num_iters): flow.clear_default_session() flow.config.enable_legacy_model_io(True) gen_var = _make_gen_var_func(shape, dtype, lr) model_save_root_dir = "./log/snapshot/" if not os.path.exists(model_save_root_dir): os.makedirs(model_save_root_dir) snapshot_path = model_save_root_dir + "snapshot-{}".format( time.strftime("%Y%m%d-%H:%M:%S")) checkpoint = flow.train.CheckPoint() checkpoint.init() variables = [] for i in range(num_iters): var = gen_var( np.random.rand(*shape).astype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype))) if i > 0: test_case.assertTrue( np.allclose(var, variables[-1] - lr / var.size)) variables.append(var) checkpoint.save("{}-{}".format(snapshot_path, i)) flow.clear_default_session() get_var = _make_get_var_func(shape, dtype) final_snapshot_path = "{}-{}".format(snapshot_path, num_iters - 1) checkpoint = flow.train.CheckPoint() checkpoint.load(final_snapshot_path) final_var = get_var() var_from_file = _load_snapshot_manually(final_snapshot_path, shape, dtype) test_case.assertTrue(np.allclose(final_var, var_from_file))
def distribute_reshape_test(device_type, device_num, input_shape, shape): assert device_type in ["gpu", "cpu"] flow.clear_default_session() flow.config.gpu_device_num(device_num) func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(type="train", function_config=func_config) def ReshapeJob(): with flow.scope.placement(device_type, "0:0-{}".format(device_num - 1)): x = flow.get_variable( "var_x", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, distribute=flow.distribute.split(2), ) loss = flow.reshape(x, shape) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0 ).minimize(loss) return (x, loss) (x, loss) = ReshapeJob().get()
def _test_reshape(test_case): flow.clear_default_session() flow.config.gpu_device_num(4) flow.config.enable_legacy_model_io(True) flow.config.enable_model_io_v2(True) func_config = flow.FunctionConfig() func_config.default_data_type(flow.float32) @flow.global_function(type="train", function_config=func_config) def FlowJob(x: flow.typing.Numpy.Placeholder((4, 6), dtype=flow.float)): with flow.scope.placement("gpu", "0:0-3", (2, 2)): v = flow.get_variable( "x", shape=(4, 6), dtype=flow.float, initializer=flow.constant_initializer(0), trainable=True, nd_sbp=["S(0)", "S(1)"], ) x = flow.hierarchical_parallel_cast(x, nd_sbp=["S(0)", "S(1)"]) x += v loss = flow.reshape(x, (4, 2, 3)) loss = flow.hierarchical_parallel_cast(loss, nd_sbp=["S(0)"]) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0).minimize(loss) return loss x = np.random.randn(4, 6).astype(np.float32) my_loss = FlowJob(x).get() test_case.assertTrue(np.allclose(x.flatten(), my_loss.numpy().flatten()))
def WatchDiff(test_case, device_type, input_shape, dtype): assert device_type in ["gpu", "cpu"] assert dtype in ["float32", "double"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) def CheckOnes(diff): ones = np.ones(input_shape) test_case.assertTrue( np.allclose(diff.numpy(), ones, rtol=1e-05, atol=1e-05)) @flow.global_function(type="train", function_config=func_config) def TrainJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(), trainable=True, ) flow.watch_diff(x, CheckOnes) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(x) TrainJob()
def _test_body(self, dy, x, mean, inv_variance, params_axis, dtype=np.float32): f1 = make_job(dy.shape, mean.shape, params_axis, dtype=flow.float32) f2 = make_xla_job(dy.shape, mean.shape, params_axis, dtype=flow.float32) (d_beta1, d_gamma1) = f1(dy, x, mean, inv_variance).get() (d_beta2, d_gamma2) = f2(dy, x, mean, inv_variance).get() print("beta diff:") print(" without xla: ", d_beta1) print(" with xla: ", d_beta2) print("gamma diff:") print(" without xla: ", d_gamma1) print(" with xla: ", d_gamma2) self.assertTrue(d_beta1.shape, d_beta2.shape) self.assertTrue(d_gamma1.shape, d_gamma2.shape) self.assertTrue( np.allclose(d_beta1.numpy(), d_beta2.numpy(), rtol=0.001, atol=1e-05)) self.assertTrue( np.allclose(d_gamma1.numpy(), d_gamma2.numpy(), rtol=0.001, atol=1e-05)) flow.clear_default_session()
def compare_with_tensorflow(device_type, x_shape, axis): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) def check_grad(x_diff_blob): assert np.array_equal(x_diff_blob.numpy(), np.ones(x_shape)) @flow.global_function(type="train", function_config=func_config) def ExpandDimsJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "var", shape=x_shape, dtype=flow.float, initializer=flow.ones_initializer(), trainable=True, ) flow.watch_diff(x, check_grad) loss = flow.expand_dims(x, axis) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(loss) return loss of_out = ExpandDimsJob().get().numpy() tf_out = tf.expand_dims(np.ones(x_shape, dtype=np.float32), axis).numpy() assert np.array_equal(of_out, tf_out)
def _run_multi_count_test(test_case, device_type, x1_shape, x2_shape, dtype, x1_count, x2_count): flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(function_config=func_config) def multi_count_not_finite_job( x1: oft.Numpy.Placeholder(x1_shape, dtype=type_name_to_flow_type[dtype]), x2: oft.Numpy.Placeholder(x2_shape, dtype=type_name_to_flow_type[dtype]), ): x_list = [] for i in range(x1_count): x_list.append(x1) for i in range(x2_count): x_list.append(x2) with flow.scope.placement(device_type, "0:0"): return flow.multi_count_not_finite(x_list) x1 = np.random.randn(*x1_shape).astype(type_name_to_np_type[dtype]) x1[0] = np.nan x1[3] = np.inf x2 = np.random.randn(*x2_shape).astype(type_name_to_np_type[dtype]) x2[2] = np.inf x2[6, 5] = np.nan y = multi_count_not_finite_job(x1, x2).get() x1_not_finite = x1.size - np.sum(np.isfinite(x1)) x2_not_finite = x2.size - np.sum(np.isfinite(x2)) np_y = x1_not_finite * x1_count + x2_not_finite * x2_count assert y.numpy() == np_y
def _test_partial_sum_to_broadcast(test_case, src_device_type, dst_device_type): flow.clear_default_session() flow.config.gpu_device_num(4) func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.consistent_view()) def build_p2b(input_blob, src_device_num, dst_device_num): with flow.scope.placement(src_device_type, "0:0-" + str(src_device_num - 1)): src = flow.identity( input_blob.with_distribute(flow.distribute.split(0))) src = flow.math.reduce_sum(src, axis=0) with flow.scope.placement(dst_device_type, "0:0-" + str(dst_device_num - 1)): dst = flow.identity( src.with_distribute(flow.distribute.broadcast())) return dst @flow.global_function(function_config=func_config) def partial_sum_to_broadcast_job(input_blob: oft.Numpy.Placeholder( (96, 96, 96))): result_list = [] for i in (2, 3): for j in (1, 2, 3): result_list.append(build_p2b(input_blob, i, j)) return tuple(result_list) x = np.random.uniform(-1e-05, 1e-05, (96, 96, 96)).astype(np.float32) result_tuple = partial_sum_to_broadcast_job(x).get() for out in result_tuple: test_case.assertTrue(np.allclose(np.sum(x, axis=0), out.numpy()))
def _test_multi_lbi(test_case, src_device_type, dst_device_type, src_device_num, dst_device_num): flow.clear_default_session() flow.config.gpu_device_num(4) func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.consistent_view()) @flow.global_function(function_config=func_config) def multi_lbi_job(x: oft.Numpy.Placeholder((96, 96, 96))): with flow.scope.placement(src_device_type, "0:0-" + str(src_device_num - 1)): src_s0 = flow.identity(x.with_distribute(flow.distribute.split(0))) src_s1 = flow.identity(x.with_distribute(flow.distribute.split(1))) src_b = flow.identity(x.with_distribute(flow.distribute.split(1))) (t0_0, t0_1, t0_2) = flow.identity_n((src_s0, src_s1, src_b)) with flow.scope.placement(dst_device_type, "0:0-" + str(dst_device_num - 1)): t0_0 = t0_0.with_distribute(flow.distribute.split(1)) t0_1 = t0_1.with_distribute(flow.distribute.broadcast()) t0_2 = t0_2.with_distribute(flow.distribute.split(1)) (t1_0, t1_1, t1_2) = flow.identity_n((t0_0, t0_1, t0_2)) return (t1_0, t1_1, t1_2) x = np.random.uniform(-1e-05, 1e-05, (96, 96, 96)).astype(np.float32) r0 = multi_lbi_job(x).get()[0].numpy() r1 = multi_lbi_job(x).get()[1].numpy() r2 = multi_lbi_job(x).get()[2].numpy() test_case.assertTrue(np.array_equal(x, r0)) test_case.assertTrue(np.array_equal(x, r1)) test_case.assertTrue(np.array_equal(x, r2))
def _test_slice_dynamic( test_case, input, slice_args, outputs, static_shape=None, dtype=flow.float32, device_tag=DEFAULT_DEVICE_TAG, ): input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) outputs = [ output.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) for output in outputs ] if static_shape is None: static_shape = input.shape flow.clear_default_session() func_cfg = flow.FunctionConfig() func_cfg.default_data_type(dtype) func_cfg.default_placement_scope(flow.scope.placement(device_tag, "0:0")) func_cfg.default_logical_view(flow.scope.mirrored_view()) slice_func = _make_slice_dynamic_func(slice_args, static_shape, dtype, func_cfg) of_outputs = slice_func([input]) for (out, of_out) in zip(outputs, of_outputs): test_case.assertTrue(np.array_equal(out, of_out[0]))
def _test_slice_update( test_case, input, update, slice_args, output, dtype=flow.float32, device_tag=DEFAULT_DEVICE_TAG, verbose=False, ): input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) update = update.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) output = output.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) flow.clear_default_session() func_cfg = flow.FunctionConfig() func_cfg.default_data_type(dtype) func_cfg.default_placement_scope(flow.scope.placement(device_tag, "0:0")) slice_func = _make_slice_update_func( slice_args, input.shape, update.shape, dtype, func_cfg ) of_output = slice_func(input, update) if verbose: print("input:\n{}".format(input)) print("update:\n{}".format(update)) print("slice_args:", slice_args) print("output:\n{}".format(output)) print("dtype:", dtype) print("device_tag:", device_tag) print("of_output:\n{}".format(of_output)) test_case.assertTrue(np.array_equal(output, of_output))
def _of_target_resize_bbox_scale(images, bbox_list, target_size, max_size): image_shape = _get_images_static_shape(images) bbox_shape = _get_bbox_static_shape(bbox_list) flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.mirrored_view()) @flow.global_function(function_config=func_config) def target_resize_bbox_scale_job( image_def: oft.ListListNumpy.Placeholder(shape=tuple(image_shape), dtype=flow.float), bbox_def: oft.ListListNumpy.Placeholder(shape=tuple(bbox_shape), dtype=flow.float), ): images_buffer = flow.tensor_list_to_tensor_buffer(image_def) (resized_images_buffer, new_size, scale) = flow.image_target_resize(images_buffer, target_size=target_size, max_size=max_size) bbox_buffer = flow.tensor_list_to_tensor_buffer(bbox_def) scaled_bbox = flow.object_bbox_scale(bbox_buffer, scale) scaled_bbox_list = flow.tensor_buffer_to_tensor_list( scaled_bbox, shape=bbox_shape[1:], dtype=flow.float) return (scaled_bbox_list, new_size) input_image_list = [np.expand_dims(image, axis=0) for image in images] input_bbox_list = [np.expand_dims(bbox, axis=0) for bbox in bbox_list] (output_bbox_list, output_image_size) = target_resize_bbox_scale_job( [input_image_list], [input_bbox_list]).get() return (output_bbox_list.numpy_lists()[0], output_image_size.numpy_list()[0])
def _of_image_decode(images): image_files = [open(im, "rb") for im in images] images_bytes = [imf.read() for imf in image_files] static_shape = (len(images_bytes), max([len(bys) for bys in images_bytes])) for imf in image_files: imf.close() flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.mirrored_view()) @flow.global_function(function_config=func_config) def image_decode_job(images_def: oft.ListListNumpy.Placeholder( shape=static_shape, dtype=flow.int8)): images_buffer = flow.tensor_list_to_tensor_buffer(images_def) decoded_images_buffer = flow.image_decode(images_buffer) return flow.tensor_buffer_to_tensor_list(decoded_images_buffer, shape=(640, 640, 3), dtype=flow.uint8) images_np_arr = [ np.frombuffer(bys, dtype=np.byte).reshape(1, -1) for bys in images_bytes ] decoded_images = image_decode_job([images_np_arr]).get().numpy_lists() return decoded_images[0]
def run_fuse_cast_scale_mlir(test_case, device=None, in_type=None, out_type=None, shape=None): flow.clear_default_session() func_config = flow.FunctionConfig() @flow.global_function(function_config=func_config) def FuseCastScaleJob(x: oft.Numpy.Placeholder( shape, dtype=in_type)) -> Tuple[oft.Numpy, oft.Numpy]: with flow.scope.placement(device, "0:0-0"): scale = flow.get_variable( "scale", shape=(1, ), dtype=out_type, initializer=flow.random_uniform_initializer(), trainable=False, ) loss = flow.cast(x, dtype=out_type) * scale return (loss, scale) np_in_type = dtype_util.convert_oneflow_dtype_to_numpy_dtype(in_type) x = (np.random.rand(*shape) * 10).astype(np_in_type) ret = FuseCastScaleJob(x) (loss, scale) = ret test_case.assertTrue(np.allclose(loss, x * scale))
def _test_body( self, x, filters, kernel_size, strides, padding, data_format, dilation_rate, dtype=np.float32, ): f2 = make_trt_job( x.shape, filters.shape, kernel_size, strides, padding, data_format, dilation_rate, dtype=flow.float32, ) for i in range(1): b = f2(x, filters).get() print("with tensorrt float32: ", b) flow.tensorrt.cache_int8_calibration() for i in range(1): b = f2(x, filters).get() print("with tensorrt int8: ", b) flow.clear_default_session()
def _test_split_to_broadcast(test_case, src_device_type, dst_device_type, src_axis): flow.clear_default_session() flow.config.gpu_device_num(4) func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.consistent_view()) def build_s2b(input_blob, src_device_num, dst_device_num): with flow.scope.placement(src_device_type, "0:0-" + str(src_device_num - 1)): src = flow.identity( input_blob.with_distribute(flow.distribute.split(src_axis))) with flow.scope.placement(dst_device_type, "0:0-" + str(dst_device_num - 1)): dst = flow.identity( src.with_distribute(flow.distribute.broadcast())) return dst @flow.global_function(function_config=func_config) def split_to_broadcast_job(input_blob: oft.Numpy.Placeholder((96, 96))): result_list = [] for i in (1, 2, 3): for j in (1, 2, 3): result_list.append(build_s2b(input_blob, i, j)) return tuple(result_list) x = np.random.rand(96, 96).astype(np.float32) result_tuple = split_to_broadcast_job(x).get() for out in result_tuple: test_case.assertTrue(np.array_equal(x, out.numpy()))
def test_user_op_module_builder_in_namespace(test_case): x = np.random.rand(2, 5).astype(np.float32) y = np.random.rand(2, 5).astype(np.float32) flow.clear_default_session() add_func = _make_global_func(test_case, x.shape, y.shape) ret = add_func(x, y) test_case.assertTrue(np.array_equal(ret, x + y))
def compare_with_tensorflow(device_type, data_type, input_shape, axis, keepdims, rtol=1e-05, atol=1e-05): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(function_config=func_config) def ReduceSumLikeJob(x: oft.Numpy.Placeholder(input_shape)): with flow.scope.placement(device_type, "0:0"): if data_type == "float16": x = flow.cast(x, dtype=flow.float16) like = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims) y = reduce_sum_like(x, like, axis=axis) y = flow.cast(y, dtype=flow.float32) else: like = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims) y = reduce_sum_like(x, like, axis=axis) return y x = np.random.rand(*input_shape).astype(np.float16).astype(np.float32) of_out = ReduceSumLikeJob(x).get() tf_out = tf.math.reduce_sum(x, axis=axis, keepdims=keepdims) if data_type == "float16": tf_out = tf.cast(tf_out, dtype=tf.float16) tf_out = tf.cast(tf_out, dtype=tf.float32) assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=rtol, atol=atol), ( of_out.numpy(), tf_out.numpy(), )
def _compare_ones_with_np(input_shape, device_type, machine_ids, device_counts): assert device_type in ["cpu", "gpu"] flow.clear_default_session() if device_type == "cpu": flow.config.cpu_device_num(device_counts) else: flow.config.gpu_device_num(device_counts) func_config = flow.FunctionConfig() func_config.default_placement_scope( flow.scope.placement(device_type, machine_ids)) np_out_ones = np.ones(shape=input_shape, dtype=np.float32) @flow.global_function(type="train", function_config=func_config) def oneflow_ones() -> tp.Numpy: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=np_out_ones.shape, dtype=flow.float32, initializer=flow.zeros_initializer(), name="x_var", ) of_ones = flow.ones(shape=input_shape, dtype=flow.float32) of_out = of_ones + v with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.001]), momentum=0).minimize(of_out) return of_ones of_out_ones = oneflow_ones() assert np.allclose(of_out_ones, np_out_ones)
def compare_with_tensorflow(device_type, x_shape, data_type, axis): assert device_type in ["gpu", "cpu"] flow.clear_default_session() func_config = flow.FunctionConfig() if data_type == "float16": dtype = flow.float else: dtype = type_name_to_flow_type[data_type] @flow.global_function(type="train", function_config=func_config) def SoftmaxJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1.0, maxval=1.0), trainable=True, ) x1 = x x = flow.identity(x) if data_type == "float16": loss = flow.cast( flow.nn.softmax(flow.cast(x, dtype=flow.float16), axis=axis), dtype=flow.float, ) else: loss = flow.nn.softmax(x, axis=axis) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) total_loss = loss * x1 flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(total_loss) return loss of_out = SoftmaxJob().get() with tf.GradientTape(persistent=True) as tape: x = tf.Variable(test_global_storage.Get("x")) tf_out = tf.nn.softmax(x, axis=axis) loss_diff = test_global_storage.Get("loss_diff") tf_x_diff = tape.gradient(tf_out, x, loss_diff) if data_type == "float16": tolerance = 0.001 else: tolerance = 1e-05 assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=tolerance, atol=tolerance) assert np.allclose( test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=tolerance, atol=tolerance, )
def _make_gpt_data_loader_func( data_file_prefix, seq_length, num_samples, batch_size, dtype, shuffle=None, random_seed=None, split_sizes=None, split_index=None, machine_num=1, device_num=1, nd_sbp=None, start_from_saved_progress=False, ): assert machine_num > 0 assert device_num > 0 and device_num <= 4 parallel_hierachy = None if machine_num == 1: device_strs = "0:0-{}".format(device_num - 1) elif machine_num > 1: device_strs = [ "{}:0-{}".format(machine_id, device_num - 1) for machine_id in range(machine_num) ] parallel_hierachy = (machine_num, device_num) else: raise ValueError("invalid machine_num", machine_num) flow.clear_default_session() flow.config.cpu_device_num(4) flow.config.enable_legacy_model_io(True) func_cfg = flow.FunctionConfig() func_cfg.default_logical_view(flow.scope.consistent_view()) @flow.global_function("predict", function_config=func_cfg) def gpt_loader_fn() -> flow.typing.Numpy: with flow.scope.placement("cpu", device_strs, parallel_hierachy): tokens = flow.data.megatron_gpt_mmap_data_loader( data_file_prefix=data_file_prefix, seq_length=seq_length, num_samples=num_samples, batch_size=batch_size, dtype=dtype, shuffle=shuffle, random_seed=random_seed, split_sizes=split_sizes, split_index=split_index, nd_sbp=nd_sbp, start_from_saved_progress=start_from_saved_progress, name="GPTDataLoader", ) if isinstance(nd_sbp, list) and len(nd_sbp) > 1: tokens = flow.hierarchical_parallel_cast(tokens, nd_sbp=["B", "B"]) tokens = flow.hierarchical_parallel_cast(tokens, nd_sbp=["B"]) return tokens check_point = flow.train.CheckPoint() check_point.init() return gpt_loader_fn
def _of_assign_and_relu(value, dtype, device_type, assign=flow.assign): flow.clear_default_session() if os.getenv("ONEFLOW_TEST_CPU_ONLY") is None: flow.config.gpu_device_num(1) flow.config.cpu_device_num(1) func_config = flow.FunctionConfig() func_config.default_data_type(dtype) func_config.default_placement_scope( flow.scope.placement(device_type, "0:0")) @flow.global_function(function_config=func_config) def assign_fn(value_def: oft.Numpy.Placeholder(value.shape, dtype=dtype)): var = flow.get_variable( name="var", shape=value.shape, dtype=dtype, initializer=flow.constant_initializer(0), ) assign(var, value_def) @flow.global_function(function_config=func_config) def relu_fn(): var = flow.get_variable( name="var", shape=value.shape, dtype=dtype, initializer=flow.constant_initializer(0), ) return flow.nn.relu(var) assign_fn(value) return relu_fn().get().numpy()
def _make_unsoted_segment_sum_fn(device, data, segment_ids, num_segments): flow.clear_default_session() @flow.global_function(type="train", function_config=func_config) def unsorted_batch_segment_sum_job( data: oft.Numpy.Placeholder(data.shape, dtype=flow.float), segment_ids: oft.Numpy.Placeholder(segment_ids.shape, dtype=flow.int32), ): with flow.scope.placement(device, "0:0"): x = flow.get_variable( "data", shape=data.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), ) data = x + data res = flow.math.unsorted_batch_segment_sum( data=data, segment_ids=segment_ids, num_segments=num_segments ) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(res) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch_diff(res, test_global_storage.Setter("loss_diff")) return res return unsorted_batch_segment_sum_job(data, segment_ids)
def compare_with_tensorflow(device_type, in_shape, axis, k, data_type, sorted): assert device_type in ["gpu", "cpu"] assert data_type in ["float32", "double", "int8", "int32", "int64"] flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_logical_view(flow.scope.mirrored_view()) func_config.default_data_type(flow.float) @flow.global_function(function_config=func_config) def TopKJob(input: oft.ListNumpy.Placeholder( tuple([dim + 10 for dim in in_shape]), dtype=type_name_to_flow_type[data_type], )): with flow.scope.placement(device_type, "0:0"): return flow.math.top_k(input, axis, k, sorted) input = (np.random.random(in_shape) * 100).astype( type_name_to_np_type[data_type]) of_out = TopKJob([input]).get().numpy_list()[0] if k <= in_shape[axis]: perm = get_perm_when_transpose_axis_to_last_dim(len(in_shape), axis) x = tf.transpose(input, perm) (_, indices) = tf.math.top_k(x, k, sorted) tf_out = tf.transpose(indices, get_inversed_perm(perm)) else: tf_out = tf.argsort(input, axis, direction="DESCENDING", stable=True) assert np.array_equal(of_out, tf_out.numpy())
def GetSeveralLossesAsNumpy(enable_inplace, num_iters=10): flow.config.enable_debug_mode(True) flow.config.gpu_device_num(1) train_config = flow.FunctionConfig() train_config.default_logical_view(flow.scope.consistent_view()) train_config.enable_inplace(enable_inplace) @flow.global_function(type="train", function_config=train_config) def PretrainJob(): loss = BuildPreTrainNet( batch_size=FLAGS.batch_size, data_part_num=FLAGS.data_part_num, seq_length=FLAGS.seq_length, max_position_embeddings=FLAGS.max_position_embeddings, num_hidden_layers=1, num_attention_heads=FLAGS.num_attention_heads, hidden_dropout_prob=FLAGS.hidden_dropout_prob, attention_probs_dropout_prob=FLAGS.attention_probs_dropout_prob, vocab_size=FLAGS.vocab_size, type_vocab_size=FLAGS.type_vocab_size, max_predictions_per_seq=FLAGS.max_predictions_per_seq, ) CreateOptimizer().minimize(loss) return loss check_point = flow.train.CheckPoint() check_point.load(FLAGS.model_load_dir) ret = [PretrainJob().get().mean() for _ in range(num_iters)] flow.clear_default_session() return np.array(ret)
def test_name_scope(test_case): flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) def get_var(var_name): return flow.get_variable( name=var_name, shape=(2, 256, 14, 14), dtype=flow.float32, initializer=flow.random_uniform_initializer(), ) @flow.global_function(function_config=func_config) def test_name_scope_job(): with flow.scope.namespace("backbone"): with flow.scope.namespace("branch"): var1 = get_var("var") with flow.scope.namespace("branch"): var2 = get_var("var") var3 = get_var("backbone-branch-var") return (var1, var2, var3) (var1, var2, var3) = test_name_scope_job().get() test_case.assertTrue(np.array_equal(var1.numpy(), var2.numpy())) test_case.assertTrue(np.array_equal(var1.numpy(), var3.numpy()))
def test_sync_dynamic_resize(_): arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu", "cpu"] arg_dict["x_shape"] = [(100, ), (1000, 10)] arg_dict["data_type"] = ["float32", "double", "int32", "int64"] arg_dict["size_type"] = ["int32", "int64"] for (device_type, x_shape, data_type, size_type) in GenArgList(arg_dict): flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) @flow.global_function(function_config=func_config) def TestJob( x: oft.Numpy.Placeholder( x_shape, dtype=type_name_to_flow_type[data_type]), size: oft.Numpy.Placeholder( (1, ), dtype=type_name_to_flow_type[size_type]), ): with flow.scope.placement(device_type, "0:0"): return flow.sync_dynamic_resize(x, size) size = np.random.randint(0, x_shape[0]) x = np.random.rand(*x_shape).astype( type_name_to_np_type[data_type]) y = (TestJob( x, np.array([size]).astype( type_name_to_np_type[size_type])).get().numpy_list()[0]) assert np.array_equal(y, x[:size])
def run_job(test_case, device=None, in_type=None, shape=None): assert shape is not None flow.clear_default_session() func_config = flow.FunctionConfig() @flow.global_function(type="train", function_config=func_config) def FuseBnAddReluJob(x: oft.Numpy.Placeholder( shape, dtype=in_type)) -> oft.Numpy: addend = flow.constant_like(x, 2) with flow.scope.placement(device, "0:0-0"): x = (flow.get_variable( "x1", shape=shape, dtype=in_type, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) + x) loss = flow.nn.relu(_batch_norm(x, last=False) + addend) + 1 flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(loss) return loss np_in_type = dtype_util.convert_oneflow_dtype_to_numpy_dtype(in_type) x = (np.random.rand(*shape) * 10).astype(np_in_type) FuseBnAddReluJob(x)
def _make_dim_gather_fn(test_case, sample, datashape): flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float32) func_config.default_logical_view(flow.scope.mirrored_view()) func_config.default_placement_scope(flow.scope.placement("gpu", "0:0")) def _compare_diff(blob: oft.ListNumpy): test_case.assertTrue(np.allclose(sample["grad"], blob[0])) @flow.global_function(type="train", function_config=func_config) def DynamicDimGatherJob( params_def: oft.ListNumpy.Placeholder(datashape, dtype=flow.float32), index_def: oft.ListNumpy.Placeholder(datashape, dtype=flow.int32), ) -> oft.ListNumpy: x_var = flow.get_variable( "input", shape=(1,), dtype=flow.float32, initializer=flow.constant_initializer(0), ) x_var = flow.cast_to_current_logical_view(x_var) x = x_var + params_def y = flow.dim_gather(x, sample["dim"], index_def) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(y) flow.watch_diff(x, _compare_diff) return y return DynamicDimGatherJob
def _of_image_target_resize(images, target_size, max_size, image_static_shape, aspect_ratio_list): assert image_static_shape[-1] == 3 flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.mirrored_view()) @flow.global_function(function_config=func_config) def image_target_resize_job(image: otp.ListListNumpy.Placeholder( shape=image_static_shape, dtype=flow.float)) -> tp.Tuple[ otp.ListListNumpy, otp.ListNumpy, otp.ListNumpy]: image_buffer = flow.tensor_list_to_tensor_buffer(image) (res_image_buffer, new_size, scale) = flow.image_target_resize( image_buffer, target_size=target_size, max_size=max_size, resize_side="shorter", ) out_shape = image_test_util.infer_keep_aspect_ratio_resized_images_static_shape( target_size=target_size, min_size=None, max_size=max_size, aspect_ratio_list=aspect_ratio_list, resize_side="shorter", channels=3, ) res_image = flow.tensor_buffer_to_tensor_list(res_image_buffer, shape=out_shape, dtype=flow.float) return (res_image, new_size, scale) (res_image, new_size, scale) = image_target_resize_job([images]) return (res_image[0], new_size[0], scale[0])