def test_engine_openmp_after_fork(): """ Test that the number of max threads in the child is 1. After forking we should not use a bigger OMP thread pool. With GOMP the child always has the same number when calling omp_get_max_threads, with LLVM OMP the child respects the number of max threads set in the parent. """ with EnvManager('OMP_NUM_THREADS', '42'): r, w = os.pipe() pid = os.fork() if pid: os.close(r) wfd = os.fdopen(w, 'w') wfd.write('a') omp_max_threads = mx.base._LIB.omp_get_max_threads() print("Parent omp max threads: {}".format(omp_max_threads)) try: wfd.close() except: pass try: (cpid, status) = os.waitpid(pid, 0) assert cpid == pid exit_status = status >> 8 assert exit_status == 0 except: pass else: os.close(w) rfd = os.fdopen(r, 'r') rfd.read(1) omp_max_threads = mx.base._LIB.omp_get_max_threads() print("Child omp max threads: {}".format(omp_max_threads)) assert omp_max_threads == 1
def test_device_pushpull(): def check_dense_pushpull(kv_type): for shape, key in zip(shapes, keys): for n_gpus in gpus: kv_device = mx.kv.create(kv_type) a = mx.nd.ones(shape, mx.gpu(0)) cur_key = str(key * max(gpus) + n_gpus) kv_device.init(cur_key, a) arr_list = [ mx.nd.ones(shape, mx.gpu(x)) for x in range(n_gpus) ] res = [mx.nd.zeros(shape, mx.gpu(x)) for x in range(n_gpus)] kv_device.push(cur_key, arr_list) kv_device.pull(cur_key, res) for x in range(n_gpus): assert (np.sum(np.abs((res[x] - n_gpus).asnumpy())) == 0) kvstore_tree_array_bound = 'MXNET_KVSTORE_TREE_ARRAY_BOUND' kvstore_usetree_values = ['', '1'] kvstore_usetree = 'MXNET_KVSTORE_USETREE' for _ in range(2): for x in kvstore_usetree_values: with EnvManager(kvstore_usetree, x): check_dense_pushpull('local') check_dense_pushpull('device') os.environ[kvstore_tree_array_bound] = '1' del os.environ[kvstore_tree_array_bound]
def test_unary_func(): def check_unary_func(x): f_exp = lambda x: nd.exp(x) f_exp_grad = lambda x: [nd.exp(x)] autograd_assert(x, func=f_exp, grad_func=f_exp_grad) f_half = lambda x: x/2 f_half_grad = lambda x: [nd.ones(x.shape) * 0.5] autograd_assert(x, func=f_half, grad_func=f_half_grad) f_square = lambda x: x**2 f_square_grad = lambda x: [2*x] autograd_assert(x, func=f_square, grad_func=f_square_grad) uniform = nd.uniform(shape=(4, 5)) stypes = ['default', 'row_sparse', 'csr'] with EnvManager('MXNET_STORAGE_FALLBACK_LOG_VERBOSE', '0'): for stype in stypes: check_unary_func(uniform.tostype(stype))
def test_binary_func(): def check_binary_func(x, y): f_add = lambda x, y: x+y f_add_grad = lambda x, y: [nd.ones(x.shape), nd.ones(y.shape)] autograd_assert(x, y, func=f_add, grad_func=f_add_grad) f_mul = lambda x, y: x*y f_mul_grad = lambda x, y: [y, x] autograd_assert(x, y, func=f_mul, grad_func=f_mul_grad) f_compose = lambda x, y: x+x*y f_compose_grad = lambda x, y: [nd.ones(x.shape) + y, x] autograd_assert(x, y, func=f_compose, grad_func=f_compose_grad) uniform_x = nd.uniform(shape=(4, 5)) uniform_y = nd.uniform(shape=(4, 5)) stypes = ['default', 'row_sparse', 'csr'] with EnvManager('MXNET_STORAGE_FALLBACK_LOG_VERBOSE', '0'): for stype_x in stypes: for stype_y in stypes: x = uniform_x.tostype(stype_x) y = uniform_y.tostype(stype_y) check_binary_func(x, y)
def test_rsp_push_pull(): def check_rsp_push_pull(kv_type, sparse_pull, is_push_cpu=True): kv = init_kv_with_str('row_sparse', kv_type) kv.init('e', mx.nd.ones(shape).tostype('row_sparse')) push_ctxs = [mx.cpu(i) if is_push_cpu else mx.gpu(i) for i in range(2)] kv.push('e', [ mx.nd.ones(shape, ctx=context).tostype('row_sparse') for context in push_ctxs ]) def check_rsp_pull(kv, ctxs, sparse_pull, is_same_rowid=False, use_slice=False): count = len(ctxs) num_rows = shape[0] row_ids = [] all_row_ids = np.arange(num_rows) vals = [ mx.nd.sparse.zeros(shape=shape, ctx=ctxs[i], stype='row_sparse') for i in range(count) ] if is_same_rowid: row_id = np.random.randint(num_rows, size=num_rows) row_ids = [mx.nd.array(row_id)] * count elif use_slice: total_row_ids = mx.nd.array( np.random.randint(num_rows, size=count * num_rows)) row_ids = [ total_row_ids[i * num_rows:(i + 1) * num_rows] for i in range(count) ] else: for i in range(count): row_id = np.random.randint(num_rows, size=num_rows) row_ids.append(mx.nd.array(row_id)) row_ids_to_pull = row_ids[0] if (len(row_ids) == 1 or is_same_rowid) else row_ids vals_to_pull = vals[0] if len(vals) == 1 else vals kv.row_sparse_pull('e', out=vals_to_pull, row_ids=row_ids_to_pull) for val, row_id in zip(vals, row_ids): retained = val.asnumpy() excluded_row_ids = np.setdiff1d(all_row_ids, row_id.asnumpy()) for row in range(num_rows): expected_val = np.zeros_like(retained[row]) expected_val += 0 if row in excluded_row_ids else 2 assert_almost_equal(retained[row], expected_val) if sparse_pull is True: kv.pull('e', out=vals_to_pull, ignore_sparse=False) for val in vals: retained = val.asnumpy() expected_val = np.zeros_like(retained) expected_val[:] = 2 assert_almost_equal(retained, expected_val) check_rsp_pull(kv, [mx.gpu(0)], sparse_pull) check_rsp_pull(kv, [mx.cpu(0)], sparse_pull) check_rsp_pull(kv, [mx.gpu(i // 2) for i in range(4)], sparse_pull) check_rsp_pull(kv, [mx.gpu(i // 2) for i in range(4)], sparse_pull, is_same_rowid=True) check_rsp_pull(kv, [mx.cpu(i) for i in range(4)], sparse_pull) check_rsp_pull(kv, [mx.cpu(i) for i in range(4)], sparse_pull, is_same_rowid=True) check_rsp_pull(kv, [mx.gpu(i // 2) for i in range(4)], sparse_pull, use_slice=True) check_rsp_pull(kv, [mx.cpu(i) for i in range(4)], sparse_pull, use_slice=True) envs = ["", "1"] key = "MXNET_KVSTORE_USETREE" for val in envs: with EnvManager(key, val): if val is "1": sparse_pull = False else: sparse_pull = True check_rsp_push_pull('local', sparse_pull) check_rsp_push_pull('device', sparse_pull) check_rsp_push_pull('device', sparse_pull, is_push_cpu=False)