def test_post_nccl_fork():
    # device_count() takes into account CUDA_VISIBLE_DEVICES, list_gpus() does not.
    visible_gpus = get_device_count()
    present_gpus = len(mx.test_utils.list_gpus())
    # some push-ups to create a one line CI output
    sys.stdout.write(str(visible_gpus) + ' of ' + str(present_gpus) + ' gpus are visible ... ')
    sys.stdout.flush()
Beispiel #2
0
def test_sync_batchnorm():
    num_devices = get_device_count()

    # no need to use SyncBN with 1 gpu
    if num_devices < 2:
        sys.stderr.write('bypassing test: needs 2 or more gpus, found {} ...'.format(num_devices))
        return
    ndev = 2
    # check with unsync version
    for i in range(10):
        _check_batchnorm_result(mx.nd.random.uniform(shape=(4, 1, 4, 4)),
                                num_devices=ndev, cuda=True)
Beispiel #3
0
def test_rsp_push_pull():
    num_gpus = get_device_count()

    def check_rsp_push_pull(kv_type, sparse_pull, is_push_cpu=True):
        kv = init_kv_with_str('row_sparse', kv_type)
        kv.init('e', mx.nd.ones(shape).tostype('row_sparse'))
        push_ctxs = [mx.cpu(i) if is_push_cpu else mx.gpu(i) for i in range(2)]
        kv.push('e', [
            mx.nd.ones(shape, ctx=context).tostype('row_sparse')
            for context in push_ctxs
        ])

        def check_rsp_pull(kv,
                           count,
                           ctxs,
                           sparse_pull,
                           is_same_rowid=False,
                           use_slice=False):
            num_rows = shape[0]
            row_ids = []
            all_row_ids = np.arange(num_rows)
            vals = [
                mx.nd.sparse.zeros(shape=shape,
                                   ctx=ctxs[i],
                                   stype='row_sparse') for i in range(count)
            ]
            if is_same_rowid:
                row_id = np.random.randint(num_rows, size=num_rows)
                row_ids = [mx.nd.array(row_id)] * count
            elif use_slice:
                total_row_ids = mx.nd.array(
                    np.random.randint(num_rows, size=count * num_rows))
                row_ids = [
                    total_row_ids[i * num_rows:(i + 1) * num_rows]
                    for i in range(count)
                ]
            else:
                for i in range(count):
                    row_id = np.random.randint(num_rows, size=num_rows)
                    row_ids.append(mx.nd.array(row_id))
            row_ids_to_pull = row_ids[0] if (len(row_ids) == 1
                                             or is_same_rowid) else row_ids
            vals_to_pull = vals[0] if len(vals) == 1 else vals

            kv.row_sparse_pull('e', out=vals_to_pull, row_ids=row_ids_to_pull)
            for val, row_id in zip(vals, row_ids):
                retained = val.asnumpy()
                excluded_row_ids = np.setdiff1d(all_row_ids, row_id.asnumpy())
                for row in range(num_rows):
                    expected_val = np.zeros_like(retained[row])
                    expected_val += 0 if row in excluded_row_ids else 2
                    assert_almost_equal(retained[row], expected_val)

            if sparse_pull is True:
                kv.pull('e', out=vals_to_pull, ignore_sparse=False)
                for val in vals:
                    retained = val.asnumpy()
                    expected_val = np.zeros_like(retained)
                    expected_val[:] = 2
                    assert_almost_equal(retained, expected_val)

        check_rsp_pull(kv, 1, [mx.gpu(0)], sparse_pull)
        check_rsp_pull(kv, 1, [mx.cpu(0)], sparse_pull)
        num_gpu_ctxs = 2 * num_gpus
        num_cpu_ctxs = 4
        check_rsp_pull(kv, num_gpu_ctxs,
                       [mx.gpu(i // 2) for i in range(num_gpu_ctxs)],
                       sparse_pull)
        check_rsp_pull(kv,
                       num_gpu_ctxs,
                       [mx.gpu(i // 2) for i in range(num_gpu_ctxs)],
                       sparse_pull,
                       is_same_rowid=True)
        check_rsp_pull(kv, num_cpu_ctxs,
                       [mx.cpu(i) for i in range(num_cpu_ctxs)], sparse_pull)
        check_rsp_pull(kv,
                       num_cpu_ctxs, [mx.cpu(i) for i in range(num_cpu_ctxs)],
                       sparse_pull,
                       is_same_rowid=True)
        check_rsp_pull(kv,
                       num_gpu_ctxs,
                       [mx.gpu(i // 2) for i in range(num_gpu_ctxs)],
                       sparse_pull,
                       use_slice=True)
        check_rsp_pull(kv,
                       num_cpu_ctxs, [mx.cpu(i) for i in range(num_cpu_ctxs)],
                       sparse_pull,
                       use_slice=True)

    envs = ["", "1"]
    key = "MXNET_KVSTORE_USETREE"
    for val in envs:
        with EnvManager(key, val):
            if val is "1":
                sparse_pull = False
            else:
                sparse_pull = True
            check_rsp_push_pull('local', sparse_pull)
            check_rsp_push_pull('device', sparse_pull)
            if num_gpus >= 2:
                check_rsp_push_pull('device', sparse_pull, is_push_cpu=False)
            else:
                sys.stdout.write('Bypassing 2-GPU test, num gpus found = ' +
                                 str(num_gpus) + ' ... ')
                sys.stdout.flush()
import sys
import os
import mxnet as mx
import numpy as np
import unittest
from mxnet.cuda_utils import get_device_count

curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.insert(0, os.path.join(curr_path, '../unittest'))
from common import *


shapes = [(10), (100), (1000), (10000), (100000), (2,2), (2,3,4,5,6,7,8)]
keys = [1,2,3,4,5,6,7]
num_gpus = get_device_count()


if num_gpus > 8 :
    print("The machine has {} gpus. We will run the test on 8 gpus.".format(num_gpus))
    print("There is a limit for all PCI-E hardware on creating number of P2P peers. The limit is 8.")
    num_gpus = 8;

gpus = range(1, 1+num_gpus)

@with_seed()
def test_nccl_pushpull():
    sys.stdout.write('Performing nccl test with ' + str(num_gpus) + ' gpu(s): ')
    sys.stdout.flush()
    for shape, key in zip(shapes, keys):
        for n_gpus in gpus: