def _test_slice_update( test_case, input, update, slice_args, output, dtype=flow.float32, device_tag=DEFAULT_DEVICE_TAG, verbose=False, ): input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) update = update.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) output = output.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) flow.clear_default_session() func_cfg = flow.FunctionConfig() func_cfg.default_data_type(dtype) func_cfg.default_placement_scope(flow.scope.placement(device_tag, "0:0")) slice_func = _make_slice_update_func(slice_args, input.shape, update.shape, dtype, func_cfg) of_output = slice_func(input, update) if verbose: print("input:\n{}".format(input)) print("update:\n{}".format(update)) print("slice_args:", slice_args) print("output:\n{}".format(output)) print("dtype:", dtype) print("device_tag:", device_tag) print("of_output:\n{}".format(of_output)) test_case.assertTrue(np.array_equal(output, of_output))
def _immediately_construct(self, *args, dtype=None, device=None, requires_grad=False, retain_grad=False, is_lazy=False): if _input_args_is_tuple_or_list(*args): numpy_data = np.array(args[0]).astype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) self._construct_determined_tensor_with_numpy( dtype=dtype, device=device, requires_grad=requires_grad, retain_grad=retain_grad, is_lazy=is_lazy, numpy_data=numpy_data, ) elif _input_args_is_numpy(*args): numpy_data = args[0].astype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) self._construct_determined_tensor_with_numpy( dtype=dtype, device=device, requires_grad=requires_grad, retain_grad=retain_grad, is_lazy=is_lazy, numpy_data=numpy_data, ) elif _input_args_is_consistent_or_mirrored(*args): self._local_or_consistent_tensor = args[0] self._undetermined_tensor = None
def _test_slice_dynamic( test_case, input, slice_args, outputs, static_shape=None, dtype=flow.float32, device_tag=DEFAULT_DEVICE_TAG, ): input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) outputs = [ output.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) for output in outputs ] if static_shape is None: static_shape = input.shape flow.clear_default_session() func_cfg = flow.FunctionConfig() func_cfg.default_data_type(dtype) func_cfg.default_placement_scope(flow.scope.placement(device_tag, "0:0")) func_cfg.default_logical_view(flow.scope.mirrored_view()) slice_func = _make_slice_dynamic_func(slice_args, static_shape, dtype, func_cfg) of_outputs = slice_func([input]) for out, of_out in zip(outputs, of_outputs): test_case.assertTrue(np.array_equal(out, of_out[0]))
def _construct_with_data( self, *args, dtype=None, device=None, requires_grad=False, retain_grad=False, placement=None, sbp=None, is_consistent=False, is_lazy=False, ): numpy_data = None if _input_args_is_tuple_or_list(*args): numpy_data = np.array(args[0]).astype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) elif _input_args_is_numpy(*args): numpy_data = args[0].astype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) shape = oneflow_api.Size(tuple(numpy_data.shape)) self._determining_initializer = _numpy_initializer_for_determining self._undetermined_tensor = UndeterminedTensor( shape, dtype, device=device, requires_grad=requires_grad, retain_grad=retain_grad, placement=placement, sbp=sbp, is_consistent=is_consistent, is_lazy=is_lazy, numpy_data=numpy_data, )
def _test_random_body(self, shape, dtype=flow.float32, target_dtype=flow.float32): np_dtype = flow.convert_oneflow_dtype_to_numpy_dtype(dtype) x = (1000 * np.random.random(shape)).astype(np_dtype) self._test_body(x, dtype=dtype, target_dtype=target_dtype)
def _test_ones_body(self, shape, dtype=flow.float32, target_dtype=flow.float32): np_dtype = flow.convert_oneflow_dtype_to_numpy_dtype(dtype) x = np.ones(shape, dtype=np_dtype) self._test_body(x, dtype=dtype, target_dtype=target_dtype)
def _test_model_io(test_case, shape, dtype, lr, num_iters): flow.clear_default_session() gen_var = _make_gen_var_func(shape, dtype, lr) snapshot_path = "./snapshot-{}".format(time.strftime("%Y%m%d-%H:%M:%S")) checkpoint = flow.train.CheckPoint() checkpoint.init() variables = [] for i in range(num_iters): var = gen_var( np.random.rand(*shape).astype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype))) if i > 0: test_case.assertTrue( np.allclose(var, (variables[-1] - lr / var.size))) variables.append(var) checkpoint.save("{}-{}".format(snapshot_path, i)) flow.clear_default_session() get_var = _make_get_var_func(shape, dtype) final_snapshot_path = "{}-{}".format(snapshot_path, num_iters - 1) checkpoint = flow.train.CheckPoint() checkpoint.load(final_snapshot_path) final_var = get_var() var_from_file = _load_snapshot_manually(final_snapshot_path, shape, dtype) test_case.assertTrue(np.allclose(final_var, var_from_file))
def read_images_by_pil(image_files, dtype, channels=3): image_objs = [PIL.Image.open(image_file) for image_file in image_files] images = [] np_dtype = flow.convert_oneflow_dtype_to_numpy_dtype(dtype) for im in image_objs: bands = im.getbands() band = "".join(bands) if band == "RGB": # convert to BGR images.append(np.asarray(im).astype(np_dtype)[:, :, ::-1]) elif band == "L": gs_image = np.asarray(im).astype(np_dtype) gs_image_shape = gs_image.shape assert len(gs_image_shape) == 2 gs_image = gs_image.reshape(gs_image_shape + (1,)) gs_image = np.broadcast_to(gs_image, shape=gs_image_shape + (3,)) images.append(gs_image) elif band == "BGR": images.append(np.asarray(im).astype(np_dtype)) else: raise NotImplementedError assert all(isinstance(image, np.ndarray) for image in images) assert all(image.ndim == 3 for image in images) assert all(image.shape[2] == channels for image in images) return images
def read_images_by_cv(image_files, dtype, channels=3): np_dtype = flow.convert_oneflow_dtype_to_numpy_dtype(dtype) images = [cv2.imread(image_file).astype(np_dtype) for image_file in image_files] assert all(isinstance(image, np.ndarray) for image in images) assert all(image.ndim == 3 for image in images) assert all(image.shape[2] == channels for image in images) return images
def _CopyToNdarrayListAndIsNewSliceStartMask(self): # get tensor list method_name = oneflow_api.Dtype_GetOfBlobCurTensorCopyToBufferFuncName( self.dtype.oneflow_proto_dtype ) copy_method = getattr(oneflow_api, method_name) tensor_list = [] oneflow_api.OfBlob_ResetTensorIterator(self.of_blob_ptr_) while oneflow_api.OfBlob_CurTensorIteratorEqEnd(self.of_blob_ptr_) == False: shape_tensor = np.zeros(self.num_axes, dtype=np.int64) oneflow_api.OfBlob_CurTensorCopyShapeTo(self.of_blob_ptr_, shape_tensor) shape = tuple(shape_tensor.tolist()) tensor = np.zeros( shape, dtype=flow.convert_oneflow_dtype_to_numpy_dtype(self.dtype) ) copy_method(self.of_blob_ptr_, tensor) tensor_list.append(tensor) oneflow_api.OfBlob_IncTensorIterator(self.of_blob_ptr_) assert len(tensor_list) == oneflow_api.OfBlob_TotalNumOfTensors( self.of_blob_ptr_ ) # generate is_new_slice_start_mask is_new_slice_start_mask = [False] * len(tensor_list) num_slices = oneflow_api.OfBlob_NumOfTensorListSlices(self.of_blob_ptr_) for x in range(num_slices): tensor_list_start = oneflow_api.OfBlob_TensorIndex4SliceId( self.of_blob_ptr_, x ) assert tensor_list_start >= 0 assert tensor_list_start < len(is_new_slice_start_mask) is_new_slice_start_mask[tensor_list_start] = True return tensor_list, is_new_slice_start_mask
def _copy_from_numpy_to_eager_local_tensor(eager_local_tensor, np_arr): method_name = eager_local_tensor._get_copy_mirrored_tensor_from_numpy_func_name( ) copy_from_numpy = getattr(eager_local_tensor, method_name) assert np_arr.dtype == flow.convert_oneflow_dtype_to_numpy_dtype( eager_local_tensor.dtype) assert np_arr.shape == tuple(eager_local_tensor.shape) copy_from_numpy(np_arr)
def _test_slice_update_grad( test_case, input, update, slice_args, output, input_diff, update_diff, dtype=flow.float32, device_tag=DEFAULT_DEVICE_TAG, verbose=False, ): input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) update = update.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) output = output.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) input_diff = input_diff.astype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) update_diff = update_diff.astype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) if verbose: print("dtype: {}".format(dtype)) print("device_tag: {}".format(device_tag)) print("input: {}\n{}\n".format(input.shape, input)) print("output: {}\n{}\n".format(output.shape, output)) def _make_diff_watcher(shape): def _watch_diff(diff: otp.Numpy): if shape == input_diff.shape: test_case.assertTrue(np.array_equal(diff, input_diff)) elif shape == update_diff.shape: test_case.assertTrue(np.array_equal(diff, update_diff)) return _watch_diff flow.clear_default_session() func_cfg = flow.FunctionConfig() func_cfg.default_data_type(dtype) func_cfg.default_placement_scope(flow.scope.placement(device_tag, "0:0")) slice_func = _make_slice_update_grad_func(slice_args, input.shape, update.shape, _make_diff_watcher, dtype, func_cfg) ret = slice_func(input, update) test_case.assertTrue(np.array_equal(ret, output))
def _local_tensor_numpy(eager_local_tensor): method_name = eager_local_tensor._get_copy_mirrored_tensor_to_numpy_func_name() copy_to_numpy = getattr(eager_local_tensor, method_name) ndarray = np.empty( tuple(eager_local_tensor.shape), dtype=flow.convert_oneflow_dtype_to_numpy_dtype(eager_local_tensor.dtype), ) copy_to_numpy(ndarray) return ndarray
def read_images_by_pil(image_files, dtype, channels=3): images = [ np.asarray(PIL.Image.open(image_file)) for image_file in image_files ] assert all(isinstance(image, np.ndarray) for image in images) assert all(image.ndim == 3 for image in images) assert all(image.shape[2] == channels for image in images) # convert image to BGR np_dtype = flow.convert_oneflow_dtype_to_numpy_dtype(dtype) converted_images = [image.astype(np_dtype)[:, :, ::-1] for image in images] return converted_images
def _CopyToNdarray(self): method_name = oneflow_api.Dtype_GetOfBlobCopyToBufferFuncName( oneflow_api.deprecated.GetProtoDtype4OfDtype(self.dtype)) copy_method = getattr(oneflow_api, method_name) shape_tensor = np.zeros(self.num_axes, dtype=np.int64) oneflow_api.OfBlob_CopyShapeTo(self.of_blob_ptr_, shape_tensor) shape = tuple(shape_tensor.tolist()) tensor = np.zeros(shape, dtype=flow.convert_oneflow_dtype_to_numpy_dtype( self.dtype)) copy_method(self.of_blob_ptr_, tensor) return tensor
def _test_categorical_ordinal_encoder(test_case, device_tag, dtype, size, capacity, num_tokens, num_iters): flow.clear_default_session() func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.consistent_view()) @flow.global_function(function_config=func_config) def test_job(x: oft.Numpy.Placeholder( shape=(size, ), dtype=dtype)) -> typing.Tuple[oft.Numpy, oft.Numpy]: with flow.scope.placement(device_tag, "0:0"): y = flow.layers.categorical_ordinal_encoder(x, capacity=capacity) z = flow.layers.categorical_ordinal_encoder(x, capacity=capacity, name="encode1") # z = flow.layers.categorical_ordinal_encoder(x, capacity=320) return y, z check_point = flow.train.CheckPoint() check_point.init() tokens = np.random.randint(-sys.maxsize, sys.maxsize, size=[ num_tokens ]).astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) k_set = set() v_set = set() kv_set = set() vk_set = set() for i in range(num_iters): x = tokens[np.random.randint(0, num_tokens, (size, ))] y, z = test_job(x) test_case.assertEqual(x.shape, y.shape) if device_tag == "cpu": test_case.assertTrue( np.array_equal(y, z), "\ny: {}\n{}\nz: {}\n{}".format(y.shape, y, z.shape, z), ) for k, v in zip(x, y): k_set.add(k) v_set.add(v) kv_set.add((k, v)) vk_set.add((v, k)) unique_size = len(k_set) test_case.assertEqual(len(v_set), unique_size) test_case.assertEqual(len(kv_set), unique_size) test_case.assertEqual(len(vk_set), unique_size)
def _test_slice_with_grad( test_case, input, slice_args, output, diff, dtype=flow.float32, device_tag=DEFAULT_DEVICE_TAG, verbose=False, ): input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) output = output.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) diff = diff.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) if verbose: print("dtype: {}".format(dtype)) print("device_tag: {}".format(device_tag)) print("input: {}\n{}\n".format(input.shape, input)) print("output: {}\n{}\n".format(output.shape, output)) print("diff: {}\n{}\n".format(diff.shape, diff)) def WatchDiff(of_diff: otp.Numpy): if verbose: print("of_diff: {}\n{}\n".format(of_diff.shape, of_diff)) test_case.assertTrue(np.array_equal(of_diff, diff)) flow.clear_default_session() func_cfg = flow.FunctionConfig() func_cfg.default_data_type(dtype) func_cfg.default_placement_scope(flow.scope.placement(device_tag, "0:0")) slice_func = _make_slice_with_grad_func( slice_args, input.shape, WatchDiff, dtype, func_cfg ) of_output = slice_func(input) if verbose: print("of_output: {}\n{}\n".format(of_output.shape, of_output)) test_case.assertTrue(np.array_equal(output, of_output))
def _test_reverse(test_case, input, axis, dtype, verbose=False): assert isinstance(input, np.ndarray) input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype)) slice_list = [slice(None)] * input.ndim for a in axis: if a < 0: a += input.ndim assert a >= 0 and a < input.ndim slice_list[a] = slice(None, None, -1) output = input[tuple(slice_list)] of_output = _of_reverse(input, axis, dtype) if verbose: print("input: {}\n{}\n".format(input.shape, input)) print("comparing output:\n{}\nvs.\n{}".format(output, of_output)) test_case.assertTrue(np.array_equal(output, of_output))
def _local_tensor_numpy(eager_local_tensor): if eager_local_tensor.dtype == flow.tensor_buffer: shapes, dtypes = eager_local_tensor._tensor_buffer_shapes_and_dtypes tensors = flow.experimental.tensor_buffer_to_list_of_tensors( Tensor(eager_local_tensor), shapes, dtypes) return [t.numpy() for t in tensors] method_name = eager_local_tensor._get_copy_mirrored_tensor_to_numpy_func_name( ) copy_to_numpy = getattr(eager_local_tensor, method_name) ndarray = np.empty( tuple(eager_local_tensor.shape), dtype=flow.convert_oneflow_dtype_to_numpy_dtype( eager_local_tensor.dtype), ) copy_to_numpy(ndarray) return ndarray
def _test_image_resize_with_cv( test_case, image_files, target_size, min_size=None, max_size=None, keep_aspect_ratio=True, resize_side="shorter", dtype=flow.float32, origin_dtype=None, ): if origin_dtype is None: origin_dtype = dtype image_list = image_test_util.read_images_by_cv(image_files, origin_dtype) (of_res_images, of_scales, of_new_sizes) = _of_image_resize( image_list=image_list, dtype=dtype, origin_dtype=origin_dtype, keep_aspect_ratio=keep_aspect_ratio, target_size=target_size, min_size=min_size, max_size=max_size, resize_side=resize_side, ) (cv_res_images, cv_scales, cv_new_sizes) = _cv_image_resize( image_list=image_list, target_size=target_size, keep_aspect_ratio=keep_aspect_ratio, min_size=min_size, max_size=max_size, resize_side=resize_side, dtype=flow.convert_oneflow_dtype_to_numpy_dtype(dtype), ) for ( of_res_image, cv_res_image, of_scale, cv_scale, of_new_size, cv_new_size, ) in zip( of_res_images, cv_res_images, of_scales, cv_scales, of_new_sizes, cv_new_sizes ): test_case.assertTrue(np.allclose(of_res_image, cv_res_image)) test_case.assertTrue(np.allclose(of_scale, cv_scale)) test_case.assertTrue(np.allclose(of_new_size, cv_new_size))
def _init( name, embedding_dims, dtype, key_type, tables, store_options, default_initializer ): default_initializer = default_initializer or { "type": "normal", "mean": 0, "std": 0.05, } key_value_store_options = {} embedding_tables = {} key_value_store_options["name"] = name if isinstance(embedding_dims, (list, tuple)): column_dims = embedding_dims embedding_dim = sum(embedding_dims) else: assert embedding_dims > 0 column_dims = [embedding_dims] embedding_dim = embedding_dims parallel_num = flow.env.get_world_size() key_type_size = np.dtype( flow.convert_oneflow_dtype_to_numpy_dtype(key_type) ).itemsize assert key_type_size > 0 key_value_store_options["key_type_size"] = key_type_size value_type_size = np.dtype( flow.convert_oneflow_dtype_to_numpy_dtype(dtype) ).itemsize assert value_type_size > 0 key_value_store_options["value_type_size"] = value_type_size scale_factor = store_options["size_factor"] key_value_store_options["storage_dim"] = scale_factor * embedding_dim # kv store assert store_options.__contains__("kv_store") kv_store = store_options["kv_store"] assert isinstance(kv_store, dict) if kv_store.__contains__("caches"): caches = kv_store["caches"] assert isinstance(caches, (dict, list, tuple)) if isinstance(caches, dict): _check_cache(caches) caches = [caches] else: assert len(caches) <= 2 for i in range(len(caches)): assert isinstance(caches[i], dict) _check_cache(caches[i]) for i in range(len(caches)): if caches[i].__contains__("capacity"): caches[i]["capacity"] = caches[i]["capacity"] // parallel_num assert kv_store.__contains__("persistent_table") persistent_table = kv_store["persistent_table"] assert isinstance(persistent_table, dict) assert persistent_table.__contains__("path") persistent_table_path = persistent_table["path"] assert isinstance(persistent_table_path, (str, list, tuple)) if isinstance(persistent_table_path, (list, tuple)): assert len(persistent_table_path) == parallel_num if persistent_table.__contains__("physical_block_size"): assert persistent_table["physical_block_size"] in [512, 4096] else: persistent_table["physical_block_size"] = 4096 if persistent_table.__contains__("capacity_hint"): assert persistent_table["capacity_hint"] >= 0 persistent_table["capacity_hint"] = ( persistent_table["capacity_hint"] // parallel_num ) key_value_store_options["kv_store"] = kv_store # initializer if tables is not None: assert isinstance(tables, (list, tuple)) for i in range(len(tables)): table = tables[i] if table.__contains__("columns"): assert not table.__contains__("initializer") columns = table["columns"] assert len(columns) == len(column_dims) for column in columns: assert isinstance(column, dict) assert column.__contains__("initializer") _check_initializer(column["initializer"]) else: assert isinstance(table, dict) assert table.__contains__("initializer") _check_initializer(table["initializer"]) columns = [] for j in range(len(column_dims)): columns.append(make_column_options(table["initializer"])) table["columns"] = columns del table["initializer"] embedding_tables["tables"] = tables else: assert default_initializer is not None _check_initializer(default_initializer) columns = [] for j in range(len(column_dims)): columns.append(make_column_options(default_initializer)) embedding_tables["tables"] = [{"columns": columns}] embedding_tables["column_dims"] = column_dims key_value_store_options["parallel_num"] = parallel_num return embedding_dim, embedding_tables, key_value_store_options
def _test_image_resize_with_cv( test_case, image_files, target_size, min_size=None, max_size=None, keep_aspect_ratio=True, resize_side="shorter", dtype=flow.float32, origin_dtype=None, print_debug_info=False, ): if origin_dtype is None: origin_dtype = dtype image_list = _cv_read_images_from_files(image_files, origin_dtype) if print_debug_info: print("origin images shapes: {}".format( [image.shape for image in image_list])) print( "target_size: {}, min_size: {}, max_size: {}, keep_aspect_ratio: {}, \n" "resize_side: {}, dtype: {}, origin_dtype: {}".format( target_size, min_size, max_size, keep_aspect_ratio, resize_side, dtype, origin_dtype, )) of_res_images, of_scales, of_new_sizes = _of_image_resize( image_list=image_list, dtype=dtype, origin_dtype=origin_dtype, keep_aspect_ratio=keep_aspect_ratio, target_size=target_size, min_size=min_size, max_size=max_size, resize_side=resize_side, print_debug_info=print_debug_info, ) cv_res_images, cv_scales, cv_new_sizes = _cv_image_resize( image_list=image_list, target_size=target_size, keep_aspect_ratio=keep_aspect_ratio, min_size=min_size, max_size=max_size, resize_side=resize_side, dtype=flow.convert_oneflow_dtype_to_numpy_dtype(dtype), ) if print_debug_info: print("comparing resized image between of and cv") for i, (of_image, cv_image) in enumerate(zip(of_res_images, cv_res_images)): print(" origin image shape: {}".format(image_list[i].shape)) print(" resized image shape: {} vs. {}".format( of_image.shape, cv_image.shape)) # print(" of_res_image:\n{}".format(of_res_image)) # print(" cv_res_image:\n{}".format(cv_res_image)) print("comparing resized image scale between of and cv") for of_scale, cv_scale in zip(of_scales, cv_scales): print(" scale: {} vs. {}:".format(of_scale, cv_scale)) print("comparing resized image new size between of and cv") for of_new_size, cv_new_size in zip(of_new_sizes, cv_new_sizes): print(" new_size: {} vs. {}:".format(of_new_size, cv_new_size)) for ( of_res_image, cv_res_image, of_scale, cv_scale, of_new_size, cv_new_size, ) in zip( of_res_images, cv_res_images, of_scales, cv_scales, of_new_sizes, cv_new_sizes, ): test_case.assertTrue(np.allclose(of_res_image, cv_res_image)) test_case.assertTrue(np.allclose(of_scale, cv_scale)) test_case.assertTrue(np.allclose(of_new_size, cv_new_size))
def _load_snapshot_manually(path, shape, dtype): var_path = os.path.join(path, "var", "out") return np.fromfile( var_path, dtype=flow.convert_oneflow_dtype_to_numpy_dtype(dtype)).reshape(*shape)
def _test_random_body(self, shape, permute, dtype=flow.float32): np_dtype = flow.convert_oneflow_dtype_to_numpy_dtype(dtype) x = np.random.random(shape).astype(np_dtype) self._test_body(x, permute, dtype=dtype)