Esempio n. 1
0
def check_hybrid_static_memory(**kwargs):
    x = mx.nd.random.uniform(shape=(2, 3, 32, 32))
    x.attach_grad()

    net1 = gluon.model_zoo.vision.get_resnet(
        1, 18, pretrained=True, prefix='net_', ctx=mx.context.current_context())
    net2 = gluon.model_zoo.vision.get_resnet(
        1, 18, pretrained=True, prefix='net_', ctx=mx.context.current_context())
    net2.hybridize(**kwargs)
    net1(x)
    net2(x)

    def test(net, x):
        with mx.autograd.record():
            y = net(x) + net(x)
            y.backward()

        grads = {k: v.grad() for k, v in net.collect_params().items() if v.grad_req != 'null'}

        return y, grads

    y1, grads1 = test(net1, x)
    y2, grads2 = test(net2, x)

    assert_almost_equal(y1.asnumpy(), y2.asnumpy(), rtol=1e-3, atol=1e-5)
    for key in grads1:
        assert_almost_equal(grads1[key].asnumpy(), grads2[key].asnumpy(), rtol=1e-3, atol=1e-5)
Esempio n. 2
0
def test_activations():
    point_to_validate = mx.nd.array([-0.1, 0.1] * 3)

    swish = mx.gluon.nn.Swish()
    def swish_test(x):
        return x * mx.nd.sigmoid(x)

    for test_point, ref_point in zip(swish_test(point_to_validate), swish(point_to_validate)):
        assert test_point == ref_point

    elu = mx.gluon.nn.ELU()
    def elu_test(x):
        def elu(x):
            return 1.0 * (mx.nd.exp(x) - 1) if x < 0 else x
        return [elu(x_i) for x_i in x]

    for test_point, ref_point in zip(elu_test(point_to_validate), elu(point_to_validate)):
        assert test_point == ref_point

    selu = mx.gluon.nn.SELU()
    def selu_test(x):
        def selu(x):
            scale, alpha = 1.0507009873554804934193349852946, 1.6732632423543772848170429916717
            return scale * x if x >= 0 else alpha * mx.nd.exp(x) - alpha
        return [selu(x_i) for x_i in x]

    for test_point, ref_point in zip(selu(point_to_validate), selu(point_to_validate)):
        assert test_point == ref_point

    prelu = mx.gluon.nn.PReLU()
    prelu.initialize()
    x = point_to_validate.reshape((1, 3, 2))
    assert_almost_equal(prelu(x).asnumpy(), mx.nd.where(x >= 0, x, 0.25 * x).asnumpy())
    def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_pool):
        with mx.Context('gpu', 0):
            data = mx.sym.Variable(name='data', shape=data_shape, dtype='float32')
            pooling_fp32 = mx.sym.Pooling(data=data, kernel=kernel, pad=pad, stride=stride,
                                          pool_type=pool_type, global_pool=global_pool, cudnn_off=False)
            arg_shapes, _, _ = pooling_fp32.infer_shape(data=data_shape)
            arg_names = pooling_fp32.list_arguments()
            pooling_fp32_exe = pooling_fp32.simple_bind(ctx=mx.current_context(), grad_req='null')
            pooling_fp32_exe.arg_dict[arg_names[0]][:] = mx.nd.random.uniform(low=-127.0, high=127.0,
                                                                              shape=data_shape).astype('int32')
            output = pooling_fp32_exe.forward()[0]

            qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8')
            min_data = mx.sym.Variable(name='min_data')
            max_data = mx.sym.Variable(name='max_data')
            quantized_pooling = mx.sym.contrib.quantized_pooling(data=qdata, min_data=min_data,
                                                                 max_data=max_data, kernel=kernel,
                                                                 pad=pad, stride=stride, pool_type=pool_type,
                                                                 global_pool=global_pool)
            pooling_int8_exe = quantized_pooling.simple_bind(ctx=mx.current_context(), grad_req='null')
            qarg_names = quantized_pooling.list_arguments()
            pooling_int8_exe.arg_dict[qarg_names[0]][:] = pooling_fp32_exe.arg_dict[arg_names[0]].astype('int8')
            quantized_range = 127.0
            pooling_int8_exe.arg_dict[qarg_names[1]][:] = -quantized_range
            pooling_int8_exe.arg_dict[qarg_names[2]][:] = quantized_range
            qoutput, min_range, max_range = pooling_int8_exe.forward()

            if pool_type == 'max':
                assert_almost_equal(output.asnumpy(), qoutput.asnumpy())
            elif pool_type == 'avg':  # for avg pooling, fp32 and int8 may be different due to rounding errors
                diff = mx.nd.abs(output - qoutput.astype(output.dtype))
                cond = mx.nd.lesser(2, diff).sum().asscalar()
                assert cond == 0
    def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias):
        with mx.Context('gpu', 0):
            # run fp32 conv
            data = mx.sym.Variable(name='data', shape=data_shape, dtype='float32')
            conv2d = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
                                        no_bias=no_bias, cudnn_off=False, name='conv2d')
            arg_shapes, _, _ = conv2d.infer_shape(data=data_shape)
            arg_names = conv2d.list_arguments()
            conv_exe_fp32 = conv2d.simple_bind(ctx=mx.current_context(), grad_req='null')
            conv_exe_fp32.arg_dict[arg_names[0]][:] = mx.nd.random.uniform(low=-127.0, high=127.0,
                                                                           shape=data_shape).astype('int32')
            conv_exe_fp32.arg_dict[arg_names[1]][:] = mx.nd.random.uniform(low=-127.0, high=127.0,
                                                                           shape=arg_shapes[1]).astype('int32')
            if not no_bias:
                conv_exe_fp32.arg_dict[arg_names[2]][:] = mx.nd.random.uniform(low=-127.0, high=127.0,
                                                                               shape=arg_shapes[2]).astype('int32')
            output = conv_exe_fp32.forward()[0]

            # run quantized conv
            qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8')
            qweight = mx.sym.Variable(name='qweight', dtype='int8')
            min_data = mx.sym.Variable(name='min_data')
            max_data = mx.sym.Variable(name='max_data')
            min_weight = mx.sym.Variable(name='min_weight')
            max_weight = mx.sym.Variable(name='max_weight')
            quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, weight=qweight, min_data=min_data,
                                                             max_data=max_data, min_weight=min_weight,
                                                             max_weight=max_weight, kernel=kernel,
                                                             num_filter=num_filter, pad=pad, stride=stride,
                                                             no_bias=no_bias)
            qarg_names = quantized_conv2d.list_arguments()
            type_dict = None
            if not no_bias:
                type_dict = {qarg_names[2]: 'int8'}
            conv_exe_int8 = quantized_conv2d.simple_bind(ctx=mx.current_context(), type_dict=type_dict, grad_req='null')
            conv_exe_int8.arg_dict[qarg_names[0]][:] = conv_exe_fp32.arg_dict[arg_names[0]].astype('int8')
            conv_exe_int8.arg_dict[qarg_names[1]][:] = conv_exe_fp32.arg_dict[arg_names[1]].astype('int8')
            quantized_range = 127.0
            if no_bias:
                conv_exe_int8.arg_dict[qarg_names[2]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[3]][:] = quantized_range
                conv_exe_int8.arg_dict[qarg_names[4]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[5]][:] = quantized_range
            else:
                conv_exe_int8.arg_dict[qarg_names[2]][:] = conv_exe_fp32.arg_dict[arg_names[2]].astype('int8')
                conv_exe_int8.arg_dict[qarg_names[3]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[4]][:] = quantized_range
                conv_exe_int8.arg_dict[qarg_names[5]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[6]][:] = quantized_range
                conv_exe_int8.arg_dict[qarg_names[7]][:] = -quantized_range
                conv_exe_int8.arg_dict[qarg_names[8]][:] = quantized_range
            qoutput, min_range, max_range = conv_exe_int8.forward()

            if no_bias:
                assert_almost_equal(output.asnumpy(), qoutput.asnumpy())
            else:
                # with adding bias, accuracy loss should not be greater than one
                diff = mx.nd.abs(output - qoutput.astype(output.dtype))
                cond = mx.nd.lesser(2, diff).sum().asscalar()
                assert cond == 0
Esempio n. 5
0
        def check_rsp_pull(kv, ctxs, sparse_pull, is_same_rowid=False, use_slice=False):
            count = len(ctxs)
            num_rows = shape[0]
            row_ids = []
            all_row_ids = np.arange(num_rows)
            vals = [mx.nd.sparse.zeros(shape=shape, ctx=ctxs[i], stype='row_sparse') for i in range(count)]
            if is_same_rowid:
                row_id = np.random.randint(num_rows, size=num_rows)
                row_ids = [mx.nd.array(row_id)] * count
            elif use_slice:
                total_row_ids = mx.nd.array(np.random.randint(num_rows, size=count*num_rows))
                row_ids = [total_row_ids[i*num_rows : (i+1)*num_rows] for i in range(count)]
            else:
                for i in range(count):
                    row_id = np.random.randint(num_rows, size=num_rows)
                    row_ids.append(mx.nd.array(row_id))
            row_ids_to_pull = row_ids[0] if (len(row_ids) == 1 or is_same_rowid) else row_ids
            vals_to_pull = vals[0] if len(vals) == 1 else vals

            kv.row_sparse_pull('e', out=vals_to_pull, row_ids=row_ids_to_pull)
            for val, row_id in zip(vals, row_ids):
                retained = val.asnumpy()
                excluded_row_ids = np.setdiff1d(all_row_ids, row_id.asnumpy())
                for row in range(num_rows):
                    expected_val = np.zeros_like(retained[row])
                    expected_val += 0 if row in excluded_row_ids else 2
                    assert_almost_equal(retained[row], expected_val)

            if sparse_pull is True:
                kv.pull('e', out=vals_to_pull, ignore_sparse=False)
                for val in vals:
                    retained = val.asnumpy()
                    expected_val = np.zeros_like(retained)
                    expected_val[:] = 2
                    assert_almost_equal(retained, expected_val)
Esempio n. 6
0
    def check_compr_random(kv, threshold, nworker):
        # set a seed so all workers generate same data. knowing this helps
        # calculate expected value after pull
        mx.random.seed(123)
        rnd.seed(123)
        nrepeat = 5
        compr_random_keys_shapes = [('2121', shape),('212221',irregular_shape),('21221', big_shape)]
        # use new keys so residual is 0 for calculation of expected
        for k,s in compr_random_keys_shapes:
            kv.init(k, mx.nd.zeros(s))
        for k,s in compr_random_keys_shapes:
            curr_residual = np.zeros(s)
            for l in range(nrepeat):
                orig_val = mx.nd.zeros(s)
                kv.pull(k, orig_val)

                grad = mx.nd.array(rnd.rand(s[0], s[1]))
                # creates a copy because push changes grad because of assignment
                grad_cpy = mx.nd.array(grad)
                kv.push(k, grad)
                val = mx.nd.zeros(s)
                kv.pull(k, val)

                diff = val - orig_val

                # compute expected by using simulation of operator
                compr, curr_residual, decompr = compute_expected_2bit_quantization(grad_cpy, curr_residual, threshold)
                decompr *= nworker * rate
                assert_almost_equal(diff.asnumpy(), decompr)
Esempio n. 7
0
def test_req():
    data = mx.nd.random.uniform(shape=(1,3,224,224))
    label = mx.nd.random.uniform(shape=(1))
    label[:] = 1
    loss = gluon.loss.SoftmaxCrossEntropyLoss()

    net = nn.HybridSequential()
    net1 = nn.HybridSequential()
    net1.add(nn.Dense(4))
    net2 = nn.HybridSequential()
    net2.add(nn.Dense(3))
    net2.add(nn.Dense(2))
    net.add(net1)
    net.add(net2)
    net.initialize()

    net.hybridize()

    for v in net.collect_params().values():
        v.grad_req = 'add'

    net.collect_params().zero_grad()
    with mx.autograd.record():
        pred = net(data)
        l = loss(pred, label)
        l.backward()
        grad = net[0][0].weight.grad().mean().asnumpy()
        # run twice to check req = add
        pred = net(data)
        l = loss(pred, label)
        l.backward()

    grad_double = net[0][0].weight.grad().mean().asnumpy()
    assert_almost_equal(grad * 2, grad_double)
def test_gnmt_encoder():
    ctx = mx.Context.default_ctx
    for cell_type in ["lstm", "gru", "relu_rnn", "tanh_rnn"]:
        for num_layers, num_bi_layers in [(2, 1), (3, 0)]:
            for use_residual in [False, True]:
                encoder = GNMTEncoder(cell_type=cell_type, num_layers=num_layers,
                                      num_bi_layers=num_bi_layers, hidden_size=8,
                                      dropout=0.0, use_residual=use_residual,
                                      prefix='gnmt_encoder_')
                encoder.initialize(ctx=ctx)
                encoder.hybridize()
                for batch_size in [4]:
                    for seq_length in [5, 10]:
                        inputs_nd = mx.nd.random.normal(0, 1, shape=(batch_size, seq_length, 4), ctx=ctx)
                        valid_length_nd = mx.nd.array(np.random.randint(1, seq_length,
                                                                        size=(batch_size,)), ctx=ctx)
                        encoder_outputs, _ = encoder(inputs_nd, valid_length=valid_length_nd)
                        valid_length_npy = valid_length_nd.asnumpy()
                        rnn_output = encoder_outputs[0].asnumpy()
                        for i in range(batch_size):
                            if valid_length_npy[i] < seq_length - 1:
                                padded_out = rnn_output[i, int(valid_length_npy[i]):, :]
                                assert_almost_equal(padded_out, np.zeros_like(padded_out), 1E-6, 1E-6)
                        assert(encoder_outputs[0].shape == (batch_size, seq_length, 8))
                        assert(len(encoder_outputs[1]) == num_layers)
Esempio n. 9
0
 def pull_init_test(kv):
     # checks that compression is not applied to init of key
     out = [mx.nd.zeros(shapes[0], mx.gpu(g)) for g in range(nworker)]
     kv.pull(gc_init_test_key, out=out)
     exp = np.ones_like(out[0].asnumpy())
     for o in out:
         assert_almost_equal(o.asnumpy(), exp)
def check_with_uniform(uf, arg_shapes, dim=None, npuf=None, rmin=-10, type_list=[np.float32]):
    """check function consistency with uniform random numbers"""
    if isinstance(arg_shapes, int):
        assert dim
        shape = tuple(np.random.randint(1, int(1000**(1.0/dim)), size=dim))
        arg_shapes = [shape] * arg_shapes
    for dtype in type_list:
        ndarray_arg = []
        numpy_arg = []
        for s in arg_shapes:
            npy = np.random.uniform(rmin, 10, s).astype(dtype)
            narr = mx.nd.array(npy, dtype=dtype)
            ndarray_arg.append(narr)
            numpy_arg.append(npy)
        out1 = uf(*ndarray_arg)
        if npuf is None:
            out2 = uf(*numpy_arg).astype(dtype)
        else:
            out2 = npuf(*numpy_arg).astype(dtype)

        assert out1.shape == out2.shape
        if isinstance(out1, mx.nd.NDArray):
            out1 = out1.asnumpy()
        if dtype == np.float16:
            assert_almost_equal(out1, out2, rtol=2e-3)
        else:
            assert_almost_equal(out1, out2)
Esempio n. 11
0
    def _check_subgraph_exe1(sym, subgraph_backend, op_names):
        """Use the partitioned sym to simple_bind an executor and compare the outputs
        with those of the original executor"""
        out = SymbolHandle()
        check_call(_LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend), mx_uint(len(op_names)),
                                                  c_str_array(op_names), ctypes.byref(out)))

        partitioned_sym = Symbol(out)
        assert partitioned_sym.list_inputs() == sym.list_inputs()
        assert partitioned_sym.list_arguments() == sym.list_arguments()
        assert partitioned_sym.list_auxiliary_states() == sym.list_auxiliary_states()
        exe = sym.simple_bind(ctx=mx.current_context(), grad_req='null')
        partitioned_exe = partitioned_sym.simple_bind(ctx=mx.current_context(), grad_req='null')
        input_names = sym.list_inputs()
        for name in input_names:
            if name in exe.arg_dict:
                exe.arg_dict[name][:] = mx.nd.random.uniform(shape=exe.arg_dict[name].shape)
                partitioned_exe.arg_dict[name][:] = exe.arg_dict[name]
            else:
                assert name in exe.aux_dict
                exe.aux_dict[name][:] = mx.nd.random.uniform(shape=exe.aux_dict[name].shape)
                partitioned_exe.aux_dict[name][:] = exe.aux_dict[name]
        exe.forward()
        partitioned_exe.forward()
        assert len(exe.outputs) == len(partitioned_exe.outputs)
        for i in range(len(exe.outputs)):
            assert_almost_equal((exe.outputs[i] - partitioned_exe.outputs[i]).abs().sum().asnumpy(),
                                np.zeros(shape=(1,)))
Esempio n. 12
0
    def _check_subgraph_exe4(sym, subgraph_backend, op_names):
        """Use env var MXNET_SUBGRAPH_BACKEND=default to trigger graph partitioning in bind
        and compare results of the partitioned sym and the original sym."""
        def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None):
            if subgraph_backend is not None:
                os.environ['MXNET_SUBGRAPH_BACKEND'] = subgraph_backend
                check_call(_LIB.MXSetSubgraphPropertyOpNames(c_str(subgraph_backend), mx_uint(len(op_names)),
                                                             c_str_array(op_names)))
            arg_shapes, _, aux_shapes = sym.infer_shape()
            if subgraph_backend is None:
                arg_array = [mx.nd.random.uniform(shape=shape) for shape in arg_shapes]
                aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes]
            else:
                arg_array = None
                aux_array = None
            exe = sym.bind(ctx=mx.current_context(),
                           args=arg_array if subgraph_backend is None else original_exec.arg_arrays,
                           aux_states=aux_array if subgraph_backend is None else original_exec.aux_arrays,
                           grad_req='null')
            exe.forward()
            if subgraph_backend is not None:
                check_call(_LIB.MXRemoveSubgraphPropertyOpNames(c_str(subgraph_backend)))
                del os.environ['MXNET_SUBGRAPH_BACKEND']
            return exe

        original_exec = get_executor(sym)
        partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec)
        outputs1 = original_exec.outputs
        outputs2 = partitioned_exec.outputs
        assert len(outputs1) == len(outputs2)
        for i in range(len(outputs1)):
            assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))
Esempio n. 13
0
    def _check_subgraph_exe2(sym, subgraph_backend, op_names):
        """Use env var MXNET_SUBGRAPH_BACKEND=default to trigger graph partitioning in simple_bind
        and compare results of the partitioned sym and the original sym."""
        def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None):
            if subgraph_backend is not None:
                os.environ['MXNET_SUBGRAPH_BACKEND'] = subgraph_backend
                check_call(_LIB.MXSetSubgraphPropertyOpNames(c_str(subgraph_backend), mx_uint(len(op_names)),
                                                             c_str_array(op_names)))
            exe = sym.simple_bind(ctx=mx.current_context(), grad_req='null')
            input_names = sym.list_inputs()
            for name in input_names:
                if name in exe.arg_dict:
                    exe.arg_dict[name][:] = mx.nd.random.uniform(shape=exe.arg_dict[name].shape)\
                        if original_exec is None else original_exec.arg_dict[name]
                else:
                    assert name in exe.aux_dict
                    exe.aux_dict[name][:] = mx.nd.random.uniform(shape=exe.aux_dict[name].shape)\
                        if original_exec is None else original_exec.aux_dict[name]
            exe.forward()
            if subgraph_backend is not None:
                check_call(_LIB.MXRemoveSubgraphPropertyOpNames(c_str(subgraph_backend)))
                del os.environ['MXNET_SUBGRAPH_BACKEND']
            return exe

        original_exec = get_executor(sym)
        partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec)
        outputs1 = original_exec.outputs
        outputs2 = partitioned_exec.outputs
        assert len(outputs1) == len(outputs2)
        for i in range(len(outputs1)):
            assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))
Esempio n. 14
0
def test_bce_loss_with_pos_weight():
    # Suppose it's a multi-label classification
    N = np.random.randint(5, 30)
    data = mx.nd.random.uniform(-1, 1, shape=(N, 20))
    label = mx.nd.array(np.random.randint(2, size=(N, 5)), dtype='float32')
    pos_weight = mx.nd.random.uniform(0, 10, shape=(1, 5))
    pos_weight = mx.nd.repeat(pos_weight, repeats=N, axis=0)
    data_iter = mx.io.NDArrayIter(data, {'label': label, 'pos_w': pos_weight}, batch_size=10, label_name='label')
    output = get_net(5)
    l = mx.symbol.Variable('label')
    pos_w = mx.symbol.Variable('pos_w')
    Loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
    loss = Loss(output, l, None, pos_w)
    loss = mx.sym.make_loss(loss)
    mod = mx.mod.Module(loss, data_names=('data',), label_names=('label', 'pos_w'))
    mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01},
            eval_metric=mx.metric.Loss(), optimizer='adam',
            initializer=mx.init.Xavier(magnitude=2))
    assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01
    # Test against npy
    data = mx.nd.random.uniform(-5, 5, shape=(N, 5))
    label = mx.nd.array(np.random.randint(2, size=(N, 5)), dtype='float32')
    pos_weight = mx.nd.random.uniform(0, 10, shape=(1, 5))
    mx_bce_loss = Loss(data, label, None, pos_weight).asnumpy()
    prob_npy = 1.0 / (1.0 + np.exp(-data.asnumpy()))
    label_npy = label.asnumpy()
    pos_weight_npy = pos_weight.asnumpy()
    npy_bce_loss = (- label_npy * np.log(prob_npy)*pos_weight_npy - (1 - label_npy) * np.log(1 - prob_npy)).mean(axis=1)
    assert_almost_equal(mx_bce_loss, npy_bce_loss, rtol=1e-4, atol=1e-5)
Esempio n. 15
0
    def _check_subgraph_exe3(sym, subgraph_backend, op_names):
        """Use the partitioned sym to bind an executor and compare the outputs
        with those of the original executor"""
        out = SymbolHandle()
        check_call(_LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend), mx_uint(len(op_names)),
                                                  c_str_array(op_names), ctypes.byref(out)))

        partitioned_sym = Symbol(out)
        input_names = sym.list_inputs()
        arg_names = sym.list_arguments()
        aux_names = sym.list_auxiliary_states()
        assert partitioned_sym.list_inputs() == input_names
        assert partitioned_sym.list_arguments() == arg_names
        assert partitioned_sym.list_auxiliary_states() == aux_names
        arg_shapes, _, aux_shapes = sym.infer_shape()
        arg_array = [mx.nd.random.uniform(shape=shape) for shape in arg_shapes]
        aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes]
        exe = sym.bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null')
        partitioned_exe = partitioned_sym.bind(ctx=mx.current_context(), args=arg_array,
                                               aux_states=aux_array, grad_req='null')
        exe.forward()
        partitioned_exe.forward()
        assert len(exe.outputs) == len(partitioned_exe.outputs)
        for i in range(len(exe.outputs)):
            assert_almost_equal((exe.outputs[i] - partitioned_exe.outputs[i]).abs().sum().asnumpy(),
                                np.zeros(shape=(1,)))
Esempio n. 16
0
def test_tensorrt_resnet18_feature_vect():
    print("downloading sample input")
    input_data = get_image(url)
    gluon_resnet18 = vision.resnet18_v2(pretrained=True)
    gluon_resnet18.hybridize()
    gluon_resnet18.forward(input_data)
    gluon_resnet18.export(model_file_name)
    sym, arg_params, aux_params = mx.model.load_checkpoint(model_file_name, 0)

    executor = sym.simple_bind(ctx=mx.gpu(), data=batch_shape,
                               grad_req='null', force_rebind=True)
    executor.copy_params_from(arg_params, aux_params)
    y = executor.forward(is_train=False, data=input_data)
    trt_sym = sym.get_backend_symbol('TensorRT')
    mx.contrib.tensorrt.init_tensorrt_params(trt_sym, arg_params, aux_params)
    original_precision_value = mx.contrib.tensorrt.get_use_fp16()
    try:
        mx.contrib.tensorrt.set_use_fp16(True)
        executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape,
                                       grad_req='null', force_rebind=True)
        executor.copy_params_from(arg_params, aux_params)
        y_trt = executor.forward(is_train=False, data=input_data)
        mx.contrib.tensorrt.set_use_fp16(False)
        executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape,
                                       grad_req='null', force_rebind=True)
        executor.copy_params_from(arg_params, aux_params)
        y_trt_fp32 = executor.forward(is_train=False, data=input_data)
        no_trt_output = y[0].asnumpy()[0]
        trt_output = y_trt[0].asnumpy()[0]
        trt_fp32_output = y_trt_fp32[0].asnumpy()[0]
        assert_almost_equal(no_trt_output, trt_output, 1e-1, 1e-2)
        assert_almost_equal(no_trt_output, trt_fp32_output, 1e-4, 1e-4)
    finally:
        mx.contrib.tensorrt.set_use_fp16(original_precision_value)
Esempio n. 17
0
 def softmax_forward(input_data, true_output):
     data = mx.sym.Variable('data')
     out1 = data.softmax(axis=1)
     exec1 = out1.bind(mx.cpu(), args={'data': input_data})
     exec1.forward()[0].wait_to_read()
     ndarr = exec1.outputs[0][0][0][0]
     nparr = ndarr.asnumpy()
     assert_almost_equal(nparr, true_output, rtol=1e-5, atol=1e-5)
Esempio n. 18
0
 def pull_before_push(kv):
     for i in range(nrepeat):
         for j in range(len(keys)):
             out = [mx.nd.ones(shapes[j], mx.gpu(g)) for g in range(nworker)]
             kv.pull(keys[j], out=out)
             exp = np.zeros_like(out[0].asnumpy())
             for o in out:
                 assert_almost_equal(o.asnumpy(), exp)
Esempio n. 19
0
def test_bce_equal_ce2():
    N = 100
    loss1 = gluon.loss.SigmoidBCELoss(from_sigmoid=True)
    loss2 = gluon.loss.SoftmaxCELoss(from_logits=True)
    out1 = mx.random.uniform(0.1, 0.9, shape=(N, 1))
    out2 = mx.nd.log(mx.nd.concat(1-out1, out1, dim=1) + 1e-8)
    label = mx.nd.round(mx.random.uniform(0, 1, shape=(N, 1)))
    assert_almost_equal(loss1(out1, label).asnumpy(), loss2(out2, label).asnumpy())
Esempio n. 20
0
def test_smooth_distribution():
    assert_exception(lambda: mx.contrib.quant._smooth_distribution(np.zeros((2,)), eps=1e-3), ValueError)
    dirac_delta = np.zeros((5,))
    dirac_delta[2] = 1
    smooth_dirac_delta = dirac_delta.copy()
    smooth_dirac_delta += 1e-3
    smooth_dirac_delta[2] -= 5e-3
    assert_almost_equal(mx.contrib.quant._smooth_distribution(dirac_delta, eps=1e-3), smooth_dirac_delta)
def test_normalize():
    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
    data_in = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
    out_nd = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in)
    data_expected = data_in.asnumpy()
    data_expected[:][:][0] = data_expected[:][:][0] / 3.0
    data_expected[:][:][1] = (data_expected[:][:][1] - 1.0) / 2.0
    data_expected[:][:][2] = data_expected[:][:][2] - 2.0
    assert_almost_equal(data_expected, out_nd.asnumpy())
Esempio n. 22
0
def test_logistic_loss_equal_bce():
    N = 100
    loss_binary = gluon.loss.LogisticLoss(label_format='binary')
    loss_signed = gluon.loss.LogisticLoss(label_format='signed')
    loss_bce = gluon.loss.SigmoidBCELoss(from_sigmoid=False)
    data = mx.random.uniform(-10, 10, shape=(N, 1))
    label = mx.nd.round(mx.random.uniform(0, 1, shape=(N, 1)))
    assert_almost_equal(loss_binary(data, label).asnumpy(), loss_bce(data, label).asnumpy())
    assert_almost_equal(loss_signed(data, 2 * label - 1).asnumpy(), loss_bce(data, label).asnumpy())
Esempio n. 23
0
def test_zero_grad():
    data = mx.nd.random.uniform(shape=(3,3))
    net = nn.Embedding(3, 4, sparse_grad=True, prefix='test_zero_grad_')
    net.initialize()
    with mx.autograd.record():
        l = net(data)
        l.backward()
    net.collect_params().zero_grad()
    grad = net.collect_params()['test_zero_grad_weight'].grad()
    assert_almost_equal(grad.asnumpy(), grad.asnumpy() * 0)
Esempio n. 24
0
    def bench_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype,
                  lhs_den, rhs_den, trans_lhs, ctx, num_repeat=10, fw="mxnet", distribution="uniform"):
        set_default_context(ctx)
        assert fw == "mxnet" or fw == "scipy"
        # Set funcs
        dot_func_sparse = mx.nd.sparse.dot if fw == "mxnet" else sp.spmatrix.dot
        dot_func_dense = mx.nd.dot if fw == "mxnet" else np.dot
        # Create matrix instances
        lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den, distribution=distribution)
        # only uniform distribution supported for rhs
        if rhs_stype == 'csr':
            rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution=distribution)
        else:
            rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution="uniform")
        lhs_dns = None
        rhs_dns = None
        dense_cost = None
        sparse_cost = None

        if fw == "mxnet":
            lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.tostype('default')
            rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.tostype('default')
            # One warm up run, verify correctness
            out = dot_func_sparse(lhs_nd, rhs_dns, trans_lhs)
            out_expected = dot_func_dense(lhs_dns, rhs_dns, trans_lhs)
            assert_almost_equal(out.asnumpy(), out_expected.asnumpy(), rtol=1e-1, atol=1e-1)
            sparse_cost = measure_cost(num_repeat, False, False, dot_func_sparse, lhs_nd, rhs_nd, trans_lhs)
            dense_cost = measure_cost(num_repeat, False, False, dot_func_dense, lhs_dns, rhs_dns, trans_lhs)
        else:
            lhs_dns = lhs_nd.asnumpy()
            rhs_dns = rhs_nd.asnumpy()
            lhs_nd = sp.csr_matrix(lhs_nd.asnumpy())
            rhs_nd = rhs_nd.asnumpy()
            # One warm up run, verify correctness
            lhs_nd_copy = sp.spmatrix.transpose(lhs_nd) if trans_lhs else lhs_nd
            out = dot_func_sparse(lhs_nd_copy, rhs_dns)
            sparse_cost = measure_cost(num_repeat, trans_lhs, False, dot_func_sparse, lhs_nd, rhs_nd)
            dense_cost = measure_cost(num_repeat, trans_lhs, True, dot_func_dense, lhs_dns, rhs_dns)

        speedup = dense_cost / sparse_cost
        # Print results
        m = lhs_shape[0]
        k = lhs_shape[1]
        n = rhs_shape[1]
        result_pattern = '{:15.1f} {:15.1f} {:>10} {:8d} {:8d} {:8d} {:13.2f} {:13.2f} {:8.2f}'
        results = result_pattern.format(lhs_den*100,
                                        rhs_den*100,
                                        str(ctx),
                                        m,
                                        k,
                                        n,
                                        sparse_cost*1000,
                                        dense_cost*1000,
                                        speedup)
        print(results)
Esempio n. 25
0
def test_mkldnn_ndarray_slice():
    ctx = mx.cpu()
    net = gluon.nn.HybridSequential()
    with net.name_scope():
        net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
    net.collect_params().initialize(ctx=ctx)
    x = mx.nd.array(np.ones([32, 3, 224, 224]), ctx)
    y = net(x)

    # trigger computation on ndarray slice
    assert_almost_equal(y[0].asnumpy()[0, 0, 0], 0.3376348)
Esempio n. 26
0
def test_global_norm_clip_multi_device():
    for check_isfinite in [True, False]:
        x1 = mx.nd.ones((3,3), ctx=mx.gpu(0))
        x2 = mx.nd.ones((4,4), ctx=mx.cpu(0))
        norm = gluon.utils.clip_global_norm([x1, x2], 1.0, check_isfinite=check_isfinite)
        if check_isfinite:
            assert norm == 5.0
        else:
            assert norm.asscalar() == 5.0
        assert_almost_equal(x1.asnumpy(), np.ones((3, 3)) / 5)
        assert_almost_equal(x2.asnumpy(), np.ones((4, 4)) / 5)
def test_inference():
    all_models = ['resnet50_v1', 'vgg19_bn', 'alexnet', #'inceptionv3',
                  'densenet201', 'squeezenet1.0', 'mobilenet0.25']

    batch_size = 10
    download_data()
    for model_name in all_models:
        eprint('testing inference on %s'%model_name)

        data_shape = (3, 224, 224) if 'inception' not in model_name else (3, 299, 299)
        dataIter = mx.io.ImageRecordIter(
            path_imgrec        = VAL_DATA,
            label_width        = 1,
            preprocess_threads = 1,
            batch_size         = batch_size,
            data_shape         = data_shape,
            label_name         = 'softmax_label',
            rand_crop          = False,
            rand_mirror        = False)
        data_batch = dataIter.next()
        data = data_batch.data[0]
        label = data_batch.label[0]
        gpu_data = data.as_in_context(mx.gpu())
        gpu_label = label.as_in_context(mx.gpu())

        # This is to create a model and run the model once to initialize
        # all parameters.
        cpu_model = get_model(model_name)
        cpu_model.collect_params().initialize(ctx=mx.cpu())
        cpu_model(mx.nd.array(data, ctx=mx.cpu()))
        gpu_model = get_model(model_name)
        gpu_model.collect_params().initialize(ctx=mx.gpu())
        gpu_model(mx.nd.array(data, ctx=mx.gpu()))

        # Force the two models have the same parameters.
        cpu_params = cpu_model.collect_params()
        gpu_params = gpu_model.collect_params()
        for k in cpu_params.keys():
            k = k.replace(cpu_params.prefix, '')
            cpu_param = cpu_params.get(k)
            gpu_param = gpu_params.get(k)
            gpu_param.set_data(cpu_param.data().as_in_context(mx.gpu()))

        for i in range(5):
            # Run inference.
            with autograd.record(train_mode=False):
                cpu_out = cpu_model(mx.nd.array(data, ctx=mx.cpu()))
                gpu_out = gpu_model(gpu_data)
            out = cpu_out.asnumpy()
            max_val = np.max(np.abs(out))
            gpu_max_val = np.max(np.abs(gpu_out.asnumpy()))
            eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val))
            assert_almost_equal(out / max_val, gpu_out.asnumpy() / max_val, rtol=1e-3, atol=1e-3)
Esempio n. 28
0
def test_row_sparse_pull_single_device():
    kvstore = mx.kv.create('device')
    copy = mx.nd.random_normal(shape=(4,4), ctx=mx.gpu(0))
    grad = copy.tostype("row_sparse")

    key = 0
    kvstore.init(key, grad)
    idx = grad.indices
    kvstore.push(key, grad)
    kvstore.row_sparse_pull(key, out=grad, row_ids=idx)

    assert_almost_equal(grad.asnumpy(), copy.asnumpy())
Esempio n. 29
0
def test_mkldnn_sum_inplace_with_cpu_layout():

    x_shape = (32, 3, 224, 224)
    x_npy = np.ones(x_shape)
    y_shape = (32, 32, 222, 222)
    y_npy = np.ones(y_shape)
    x = mx.sym.Variable("x")
    y = mx.sym.Variable("y")
    z = mx.symbol.Convolution(data=x, num_filter=32, kernel=(3, 3))
    z = mx.sym.add_n(z, y)
    exe = z.simple_bind(ctx=mx.cpu(), x=x_shape, y=y_shape)
    out = exe.forward(is_train=False, x=x_npy, y=y_npy)[0]
    assert_almost_equal(out[0].asnumpy()[0, 0, 0], 1.0)
Esempio n. 30
0
def test_global_norm_clip():
    x1 = mx.nd.ones((3,3))
    x2 = mx.nd.ones((4,4))
    norm = gluon.utils.clip_global_norm([x1, x2], 1.0)
    assert norm == 5.0
    assert_almost_equal(x1.asnumpy(), np.ones((3,3))/5)
    assert_almost_equal(x2.asnumpy(), np.ones((4,4))/5)

    x3 = mx.nd.array([1.0, 2.0, float('nan')])
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        gluon.utils.clip_global_norm([x1, x3], 2.0)
        assert len(w) == 1
Esempio n. 31
0
def test_quantize_float32_to_int8():
    shape = rand_shape_nd(4)
    data = rand_ndarray(shape, 'default', dtype='float32')
    min_range = mx.nd.min(data)
    max_range = mx.nd.max(data)
    qdata, min_val, max_val = mx.nd.contrib.quantize(data,
                                                     min_range,
                                                     max_range,
                                                     out_type='int8')
    data_np = data.asnumpy()
    min_range = min_range.asscalar()
    max_range = max_range.asscalar()
    real_range = np.maximum(np.abs(min_range), np.abs(max_range))
    quantized_range = 127.0
    scale = quantized_range / real_range
    assert qdata.dtype == np.int8
    assert min_val.dtype == np.float32
    assert max_val.dtype == np.float32
    assert same(min_val.asscalar(), -real_range)
    assert same(max_val.asscalar(), real_range)
    qdata_np = (np.sign(data_np) * np.minimum(
        np.abs(data_np) * scale + 0.5, quantized_range)).astype(np.int8)
    assert_almost_equal(qdata.asnumpy(), qdata_np, atol=1)
def test_random_rotation():
    # test exceptions for probability input outside of [0,1]
    assertRaises(ValueError,
                 transforms.RandomRotation, [-10, 10.],
                 rotate_with_proba=1.1)
    assertRaises(ValueError,
                 transforms.RandomRotation, [-10, 10.],
                 rotate_with_proba=-0.3)
    # test `forward`
    transformer = transforms.RandomRotation([-10, 10.])
    assertRaises(TypeError, transformer, mx.np.ones((3, 30, 60),
                                                    dtype='uint8'))
    single_image = mx.np.ones((3, 30, 60), dtype='float32')
    single_output = transformer(single_image)
    assert same(single_output.shape, (3, 30, 60))
    batch_image = mx.np.ones((3, 3, 30, 60), dtype='float32')
    batch_output = transformer(batch_image)
    assert same(batch_output.shape, (3, 3, 30, 60))
    # test identity (rotate_with_proba = 0)
    transformer = transforms.RandomRotation([-100., 100.],
                                            rotate_with_proba=0.0)
    data = mx.np.random.normal(size=(3, 30, 60))
    assert_almost_equal(data.asnumpy(), transformer(data).asnumpy())
Esempio n. 33
0
def test_bce_loss():
    N = 20
    data = mx.random.uniform(-1, 1, shape=(N, 20))
    label = mx.nd.array(np.random.randint(2, size=(N,)), dtype='float32')
    data_iter = mx.io.NDArrayIter(data, label, batch_size=10, label_name='label')
    output = get_net(1)
    l = mx.symbol.Variable('label')
    Loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
    loss = Loss(output, l)
    loss = mx.sym.make_loss(loss)
    mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',))
    mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01},
            eval_metric=mx.gluon.metric.Loss(), optimizer='adam',
            initializer=mx.init.Xavier(magnitude=2))
    assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.01
    # Test against npy
    data = mx.random.uniform(-5, 5, shape=(10,))
    label = mx.random.uniform(0, 1, shape=(10,))
    mx_bce_loss = Loss(data, label).asnumpy()
    prob_npy = 1.0 / (1.0 + np.exp(-data.asnumpy()))
    label_npy = label.asnumpy()
    npy_bce_loss = - label_npy * np.log(prob_npy) - (1 - label_npy) * np.log(1 - prob_npy)
    assert_almost_equal(mx_bce_loss, npy_bce_loss, rtol=1e-4, atol=1e-5)
Esempio n. 34
0
def test_cosine_loss(hybridize):
    #Generating samples
    input1 = mx.np.random.randn(3, 2)
    input2 = mx.np.random.randn(3, 2)
    label = mx.np.sign(mx.np.random.randn(input1.shape[0]))
    #Calculating loss from cosine embedding loss function in Gluon
    Loss = gluon.loss.CosineEmbeddingLoss()
    if hybridize:
        Loss.hybridize()
    loss = Loss(input1, input2, label)

    # Calculating the loss Numpy way
    numerator = mx.np.sum(input1 * input2, keepdims=True, axis=1)
    denominator = mx.np.sqrt(mx.np.sum(input1**2, axis=1, keepdims=True)) \
    * mx.np.sqrt(mx.np.sum(input2**2, axis=1, keepdims=True))
    x = numerator / denominator
    label = mx.npx.reshape(label, (-1, 1))
    numpy_loss = mx.npx.reshape(mx.np.where(label == 1, 1 - x, mx.npx.relu(x)),
                                (-1, ))
    assert_almost_equal(loss.asnumpy(),
                        numpy_loss.asnumpy(),
                        rtol=1e-3,
                        atol=1e-5)
Esempio n. 35
0
def test_export():
    ctx = mx.context.current_context()
    model = gluon.model_zoo.vision.resnet18_v1(prefix='resnet',
                                               ctx=ctx,
                                               pretrained=True)
    model.hybridize()
    data = mx.nd.random.normal(shape=(1, 3, 224, 224))
    out = model(data)

    model.export('gluon')

    module = mx.mod.Module.load('gluon', 0, label_names=None, context=ctx)
    module.bind(data_shapes=[('data', data.shape)])
    module.forward(mx.io.DataBatch([data], None), is_train=False)
    mod_out, = module.get_outputs()

    assert_almost_equal(out.asnumpy(), mod_out.asnumpy())

    model2 = gluon.model_zoo.vision.resnet18_v1(prefix='resnet', ctx=ctx)
    model2.collect_params().load('gluon-0000.params', ctx)
    out2 = model2(data)

    assert_almost_equal(out.asnumpy(), out2.asnumpy())
Esempio n. 36
0
def test_np_get_constant():
    const_arr = _np.random.uniform(0, 100, size=(10, 10)).astype(_np.float32)

    class Foo(gluon.HybridBlock):
        def __init__(self):
            super(Foo, self).__init__()
            self.weight = gluon.Constant(const_arr)

        def forward(self, x):
            ctx = x.ctx
            return x + self.weight.data(ctx).astype(np.float32)

    x = np.random.uniform(size=const_arr.shape, dtype=const_arr.dtype)
    for hybridize in [False, True]:
        foo = Foo()
        if hybridize:
            foo.hybridize()
        foo.initialize()
        out = foo(x)
        assert_almost_equal(out.asnumpy(), (x.asnumpy() + const_arr),
                            atol=1e-5,
                            rtol=1e-4,
                            use_broadcast=False)
Esempio n. 37
0
def test_gluon_ctc_consistency():
    loss = mx.gluon.loss.CTCLoss()
    data = mx.nd.arange(0, 4, repeat=40, ctx=mx.gpu(0)).reshape(
        (2, 20, 4)).flip(axis=0)
    cpu_label = mx.nd.array([[2, 1, -1, -1], [3, 2, 2, -1]], ctx=mx.cpu(0))
    gpu_label = mx.nd.array([[2, 1, -1, -1], [3, 2, 2, -1]], ctx=mx.gpu(0))

    cpu_data = data.copy().as_in_context(mx.cpu(0))
    cpu_data.attach_grad()
    with mx.autograd.record():
        l_cpu = loss(cpu_data, cpu_label)
        l_cpu.backward()

    gpu_data = data.copyto(mx.gpu(0))
    gpu_data.attach_grad()
    with mx.autograd.record():
        l_gpu = loss(gpu_data, gpu_label)
        l_gpu.backward()

    assert_almost_equal(cpu_data.grad.asnumpy(),
                        gpu_data.grad.asnumpy(),
                        atol=1e-3,
                        rtol=1e-3)
Esempio n. 38
0
def test_global_norm_clip_multi_device():
    for check_isfinite in [True, False]:
        x1 = mx.nd.ones((3, 3), ctx=mx.gpu(0))
        x2 = mx.nd.ones((4, 4), ctx=mx.cpu(0))
        x3 = mx.nd.ones((7, 4), ctx=mx.gpu(0))
        x4 = mx.nd.ones((7, 4), ctx=mx.cpu(0))
        norm = gluon.utils.clip_global_norm(
            [x1, x2, x3, x4], 1.0, check_isfinite=check_isfinite)
        if check_isfinite:
            assert norm == 9.0
        else:
            assert norm.asscalar() == 9.0
        assert_almost_equal(x1, np.ones((3, 3)) / 9)
        assert_almost_equal(x2, np.ones((4, 4)) / 9)
        assert_almost_equal(x3, np.ones((7, 4)) / 9)
        assert_almost_equal(x4, np.ones((7, 4)) / 9)
Esempio n. 39
0
def test_get_optimal_thresholds():
    # Given an ndarray with elements following a uniform distribution, the optimal threshold
    # for quantizing the ndarray should be either abs(min(nd)) or abs(max(nd)).
    def get_threshold(nd):
        min_nd = mx.nd.min(nd)
        max_nd = mx.nd.max(nd)
        return mx.nd.maximum(mx.nd.abs(min_nd), mx.nd.abs(max_nd)).asnumpy()

    for dtype in ['uint8', 'int8', 'auto']:
        nd_dict = {
            'layer1':
            mx.nd.uniform(low=-10.532,
                          high=11.3432,
                          shape=(8, 3, 23, 23),
                          dtype=np.float64)
        }
        expected_threshold = get_threshold(nd_dict['layer1'])
        th_dict = mx.contrib.quant._get_optimal_thresholds(nd_dict, dtype)
        assert 'layer1' in th_dict
        assert_almost_equal(np.array([th_dict['layer1'][1]]),
                            expected_threshold,
                            rtol=1e-2,
                            atol=1e-4)
Esempio n. 40
0
def compare_optimizer(opt1,
                      opt2,
                      shape,
                      dtype,
                      w_stype='default',
                      g_stype='default',
                      rtol=1e-4,
                      atol=1e-5,
                      compare_states=True):
    """Compare opt1 and opt2."""
    if w_stype == 'default':
        w2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype)
        w1 = w2.copyto(default_context())
    elif w_stype == 'row_sparse' or w_stype == 'csr':
        w2 = rand_ndarray(shape, w_stype, density=1, dtype=dtype)
        w1 = w2.copyto(default_context()).tostype('default')
    else:
        raise Exception("type not supported yet")
    if g_stype == 'default':
        g2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype)
        g1 = g2.copyto(default_context())
    elif g_stype == 'row_sparse' or g_stype == 'csr':
        g2 = rand_ndarray(shape, g_stype, dtype=dtype)
        g1 = g2.copyto(default_context()).tostype('default')
    else:
        raise Exception("type not supported yet")

    state1 = opt1.create_state_multi_precision(0, w1)
    state2 = opt2.create_state_multi_precision(0, w2)
    if compare_states:
        compare_ndarray_tuple(state1, state2)

    opt1.update_multi_precision(0, w1, g1, state1)
    opt2.update_multi_precision(0, w2, g2, state2)
    if compare_states:
        compare_ndarray_tuple(state1, state2, rtol=rtol, atol=atol)
    assert_almost_equal(w1.asnumpy(), w2.asnumpy(), rtol=rtol, atol=atol)
Esempio n. 41
0
def test_np_broadcast_to():
    class TestBroadcastTo(HybridBlock):
        def __init__(self, dst_shape):
            super(TestBroadcastTo, self).__init__()
            self._dst_shape = dst_shape

        def hybrid_forward(self, F, x):
            return F.np.broadcast_to(x, self._dst_shape)

    shapes = [((), (1, 2, 4, 5)), ((1, ), (4, 5, 6)), ((1, 0), (2, 4, 0)),
              ((1, 1), (2, 4, 0)), ((4, 1), (1, 2, 3, 4, 5)),
              ((4, 1), (1, 0, 3, 4, 5))]
    for src_shape, dst_shape in shapes:
        for hybridize in [True, False]:
            test_broadcast_to = TestBroadcastTo(dst_shape)
            if hybridize:
                test_broadcast_to.hybridize()

            a = _np.random.uniform(size=src_shape).astype(np.float32)
            expected_ret = _np.broadcast_to(a, dst_shape)
            a_mx = np.array(a, dtype=a.dtype)
            a_mx.attach_grad()
            with mx.autograd.record():
                ret = test_broadcast_to(a_mx)
            assert_almost_equal(ret.asnumpy(),
                                expected_ret,
                                rtol=1e-5,
                                atol=1e-6,
                                use_broadcast=False)
            ret.backward()
            expected_grad = collapse_sum_like(_np.ones_like(expected_ret),
                                              src_shape)
            assert_almost_equal(a_mx.grad.asnumpy(),
                                expected_grad,
                                rtol=1e-5,
                                atol=1e-6,
                                use_broadcast=False)
Esempio n. 42
0
def test_subgraph_exe2(sym, subgraph_backend, op_names):
    """Use env var MXNET_SUBGRAPH_BACKEND=default to trigger graph partitioning in _simple_bind
    and compare results of the partitioned sym and the original sym."""
    def get_executor(sym,
                     subgraph_backend=None,
                     op_names=None,
                     original_exec=None):
        exe = sym._simple_bind(ctx=mx.current_context(), grad_req='null')
        input_names = sym.list_inputs()
        for name in input_names:
            if name in exe.arg_dict:
                exe.arg_dict[name][:] = mx.nd.random.uniform(shape=exe.arg_dict[name].shape)\
                    if original_exec is None else original_exec.arg_dict[name]
            else:
                assert name in exe.aux_dict
                exe.aux_dict[name][:] = mx.nd.random.uniform(shape=exe.aux_dict[name].shape)\
                    if original_exec is None else original_exec.aux_dict[name]
        exe.forward()
        return exe

    sym, _, _ = sym
    original_exec = get_executor(sym)
    with environment('MXNET_SUBGRAPH_BACKEND', subgraph_backend):
        check_call(
            _LIB.MXSetSubgraphPropertyOpNames(c_str(subgraph_backend),
                                              mx_uint(len(op_names)),
                                              c_str_array(op_names)))
        partitioned_exec = get_executor(sym, subgraph_backend, op_names,
                                        original_exec)
        check_call(
            _LIB.MXRemoveSubgraphPropertyOpNames(c_str(subgraph_backend)))
    outputs1 = original_exec.outputs
    outputs2 = partitioned_exec.outputs
    assert len(outputs1) == len(outputs2)
    for i in range(len(outputs1)):
        assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(),
                            onp.zeros(shape=(1, )))
Esempio n. 43
0
    def verify(batch_size):
        print('verifying batch size: ', batch_size)
        fold = Fold()
        num_samples = 100
        inputs = []
        fold_preds = []
        for i in range(num_samples):
            # get next batch
            l_sent = l_sentences[i]
            r_sent = r_sentences[i]
            l_tree = l_trees[i]
            r_tree = r_trees[i]

            inputs.append((l_sent, r_sent, l_tree, r_tree))
            z_fold = net.fold_encode(fold, l_sent, r_sent, l_tree, r_tree)
            fold_preds.append(z_fold)

            if (i + 1) % batch_size == 0 or (i + 1) == num_samples:
                fold_outs = fold([fold_preds])[0]
                outs = mx.nd.concat(*[
                    net(l_sent, r_sent, l_tree, r_tree)
                    for l_sent, r_sent, l_tree, r_tree in inputs
                ],
                                    dim=0)
                if not almost_equal(fold_outs.asnumpy(), outs.asnumpy()):
                    print(fold_preds)
                    print('l_sents: ', l_sent, l_sentences[i - 1])
                    print('r_sents: ', r_sent, r_sentences[i - 1])
                    print('\n'.join(
                        (str(l_tree), str_tree(l_tree), str(r_tree),
                         str_tree(r_tree), str(l_trees[i - 1]),
                         str_tree(l_trees[i - 1]), str(r_trees[i - 1]),
                         str_tree(r_trees[i - 1]), str(fold))))
                    assert_almost_equal(fold_outs.asnumpy(), outs.asnumpy())
                fold_preds = []
                inputs = []
                fold.reset()
def test_instance_norm():
    dtype = np.float32
    forward_check_eps = 1E-3
    axis = -1
    eps = 1E-5
    in_shape = (LARGE_X, 1, SMALL_Y)
    ctx = mx.cpu()

    # Implementation of instance normalization using numpy
    def npy_instance_norm(data, gamma, beta, axis, eps=1E-5):
        if axis < 0:
            axis += data.ndim
        broadcast_shape = [1 for _ in range(data.ndim)]
        broadcast_shape[axis] = data.shape[axis]
        mean = data.mean(axis=axis, keepdims=True).astype(dtype)
        var = data.var(axis=axis, keepdims=True).astype(dtype)
        std = np.sqrt(var + dtype(eps)).astype(dtype)
        out = gamma * (data - mean) / std + \
              beta
        return out
    data = np.random.normal(0, 1, in_shape).astype(dtype)
    gamma = np.random.normal(0, 1, (1,)).astype(dtype)
    beta = np.random.normal(0, 1, (1,)).astype(dtype)
    data_s = mx.symbol.Variable('data')
    gamma_s = mx.symbol.Variable('gamma')
    beta_s = mx.symbol.Variable('beta')
    out_s = mx.symbol.InstanceNorm(data=data_s, gamma=gamma_s, beta=beta_s,
                                   eps=eps)
    exe = out_s.simple_bind(ctx, data=in_shape)
    exe.arg_dict['data'][:] = data
    exe.arg_dict['gamma'][:] = gamma
    exe.arg_dict['beta'][:] = beta
    out_nd = exe.forward()[0]
    # Calls implementation of instance norm in numpy and compares the output
    out = npy_instance_norm(data, gamma, beta, axis, eps)
    assert_almost_equal(out, out_nd.asnumpy(), forward_check_eps,
                        forward_check_eps)
Esempio n. 45
0
def test_subgraph_exe1(sym, subgraph_backend, op_names):
    """Use the partitioned sym to _simple_bind an executor and compare the outputs
    with those of the original executor"""
    sym, _, _ = sym
    out = SymbolHandle()
    check_call(
        _LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend),
                                      mx_uint(len(op_names)),
                                      c_str_array(op_names),
                                      ctypes.byref(out)))

    partitioned_sym = Symbol(out)
    assert partitioned_sym.list_inputs() == sym.list_inputs()
    assert partitioned_sym.list_arguments() == sym.list_arguments()
    assert partitioned_sym.list_auxiliary_states(
    ) == sym.list_auxiliary_states()
    exe = sym._simple_bind(ctx=mx.current_context(), grad_req='null')
    partitioned_exe = partitioned_sym._simple_bind(ctx=mx.current_context(),
                                                   grad_req='null')
    input_names = sym.list_inputs()
    for name in input_names:
        if name in exe.arg_dict:
            exe.arg_dict[name][:] = mx.nd.random.uniform(
                shape=exe.arg_dict[name].shape)
            partitioned_exe.arg_dict[name][:] = exe.arg_dict[name]
        else:
            assert name in exe.aux_dict
            exe.aux_dict[name][:] = mx.nd.random.uniform(
                shape=exe.aux_dict[name].shape)
            partitioned_exe.aux_dict[name][:] = exe.aux_dict[name]
    exe.forward()
    partitioned_exe.forward()
    assert len(exe.outputs) == len(partitioned_exe.outputs)
    for i in range(len(exe.outputs)):
        assert_almost_equal((exe.outputs[i] -
                             partitioned_exe.outputs[i]).abs().sum().asnumpy(),
                            onp.zeros(shape=(1, )))
def test_predictor():
    prefix = 'test_predictor_simple_dense'
    symbol_file = "%s-symbol.json" % prefix
    param_file = "%s-0000.params" % prefix

    # two inputs with different batch sizes
    input1 = np.random.uniform(size=(1, 3))
    input2 = np.random.uniform(size=(3, 3))

    # define a simple model
    block = gluon.nn.HybridSequential()
    block.add(gluon.nn.Dense(7))
    block.add(gluon.nn.Dense(3))
    block.hybridize()
    block.initialize()
    out1 = block.forward(nd.array(input1))
    out2 = block.forward(nd.array(input2))
    block.export(prefix)

    # create a predictor
    predictor = Predictor(
        open(symbol_file, "r").read(),
        open(param_file, "rb").read(), {'data': input1.shape})

    # forward and get output
    predictor.forward(data=input1)
    predictor_out1 = predictor.get_output(0)
    assert_almost_equal(out1.asnumpy(), predictor_out1, rtol=1e-5, atol=1e-6)

    # reshape
    predictor.reshape({'data': input2.shape})
    predictor.forward(data=input2)
    predictor_out2 = predictor.get_output(0)
    assert_almost_equal(out2.asnumpy(), predictor_out2, rtol=1e-5, atol=1e-6)

    # destroy the predictor
    del predictor
Esempio n. 47
0
def test_transformer_encoder():
    ctx = mx.current_context()
    for num_layers in range(1, 3):
        for output_attention in [True, False]:
            for use_residual in [False, True]:
                encoder = TransformerEncoder(num_layers=num_layers, max_length=10,
                                             units=16, hidden_size=32, num_heads=8,
                                             dropout=0.0, use_residual=use_residual,
                                             output_attention=output_attention, prefix='transformer_encoder_')
                encoder.initialize(ctx=ctx)
                encoder.hybridize()
                for batch_size in [4]:
                    for seq_length in [5, 10]:
                        inputs_nd = mx.nd.random.normal(0, 1, shape=(batch_size, seq_length, 16), ctx=ctx)
                        valid_length_nd = mx.nd.array(np.random.randint(1, seq_length,
                                                                        size=(batch_size,)), ctx=ctx)
                        encoder_outputs, additional_outputs = encoder(inputs_nd, valid_length=valid_length_nd)
                        valid_length_npy = valid_length_nd.asnumpy()
                        encoder_outputs = encoder_outputs.asnumpy()
                        for i in range(batch_size):
                            if valid_length_npy[i] < seq_length - 1:
                                padded_out = encoder_outputs[i, int(valid_length_npy[i]):, :]
                                assert_almost_equal(padded_out, np.zeros_like(padded_out), 1E-6, 1E-6)
                        assert(encoder_outputs.shape == (batch_size, seq_length, 16))
                        if output_attention:
                            assert(len(additional_outputs) == num_layers)
                            attention_out = additional_outputs[0][0].asnumpy()
                            assert(attention_out.shape == (batch_size, 8, seq_length, seq_length))
                            for i in range(batch_size):
                                mem_v_len = int(valid_length_npy[i])
                                if mem_v_len < seq_length - 1:
                                    assert((attention_out[i, :, :, mem_v_len:] == 0).all())
                                if mem_v_len > 0:
                                    assert_almost_equal(attention_out[i, :, :, :].sum(axis=-1),
                                                      np.ones(attention_out.shape[1:3]))
                        else:
                            assert(len(additional_outputs) == 0)
Esempio n. 48
0
def check_fusion(sym, data_shape, attrs_op):
    sym_sg = sym.get_backend_symbol("MKLDNN")
    assert ''.join(
        sym_sg.get_internals().list_outputs()).find('sg_mkldnn_conv') != -1
    for k, v in sym_sg.attr_dict().items():
        if k.find('sg_mkldnn_conv') != -1:
            for attr_op in attrs_op:
                assert v[attr_op] == 'true'

    arg_shapes, _, aux_shapes = sym.infer_shape()
    arg_array = [
        mx.nd.random.uniform(-1, 1, shape=shape) for shape in arg_shapes
    ]
    aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes]
    exe = sym.bind(ctx=mx.current_context(),
                   args=arg_array,
                   aux_states=aux_array,
                   grad_req='null')
    exe.forward()
    os.environ['MXNET_SUBGRAPH_BACKEND'] = 'MKLDNN'
    exe_sg = sym.bind(ctx=mx.current_context(),
                      args=arg_array,
                      aux_states=aux_array,
                      grad_req='null')
    exe_sg.forward()
    del os.environ['MXNET_SUBGRAPH_BACKEND']
    for i in range(len(exe.outputs)):
        assert_almost_equal(exe.outputs[i].asnumpy(),
                            exe_sg.outputs[i].asnumpy(),
                            rtol=1e-3,
                            atol=1e-3)

    # fp32 to int8
    for out_type in ('uint8', 'int8', 'auto'):
        check_quantize(sym, data_shape, out_type)
        check_quantize(sym, data_shape, out_type, gluon_forward=True)
Esempio n. 49
0
def test_elemwise_add():
    def ref_add(a, b):
        return np.add(a, b)

    a_sym = mx.sym.Variable("a")
    b_sym = mx.sym.Variable("b")
    dshape = rand_shape_nd(4)
    a_shape = tuple(dshape)
    b_shape = tuple(dshape)
    z = mx.sym.elemwise_add(a_sym, b_sym)
    a = np.random.uniform(-1, 1, a_shape)
    b = np.random.uniform(-1, 1, b_shape)
    exe = z._simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape)
    out = exe.forward(is_train=False, a=a, b=b)
    ref_out = ref_add(a, b)
    out = out[0].asnumpy()
    assert_almost_equal(out, ref_out, rtol=1e-6, atol=1e-6)

    def check_elemwise_add_training(stype):
        data_shape = rand_shape_nd(4)
        for density in [1.0, 0.5, 0.0]:
            a_sym = mx.sym.Variable('a')
            b_sym = mx.sym.Variable('b')
            sym = mx.sym.elemwise_add(a_sym, b_sym)
            a = rand_ndarray(shape=data_shape, stype=stype, density=density)
            b = rand_ndarray(shape=data_shape, stype=stype, density=density)
            in_location = [a, b]
            check_numeric_gradient(sym,
                                   in_location,
                                   numeric_eps=1e-3,
                                   rtol=1e-3,
                                   atol=5e-3)

    stypes = ['row_sparse', 'default']
    for stype in stypes:
        check_elemwise_add_training(stype)
Esempio n. 50
0
def test_subgraph_backend_gluon(sym, subgraph_backend, op_names, tmp_path):
    """Call hybridize() to partition the graph, and then compare results of the partitioned
    sym and the original sym. Here do an inference before hybridizing with the subgraph_backend
    which means we'll pass shapes/types"""
    # create Gluon block for given symbol
    inputs = [mx.sym.var(i, dtype=mx_real_t) for i in sym[1]]
    sym_block = nn.SymbolBlock(sym[0], inputs)
    sym_block.initialize(ctx=mx.current_context())
    x = [
        mx.nd.random.uniform(shape=s, ctx=mx.current_context()) for s in sym[2]
    ]
    # hybridize and export to get baseline
    sym_block.hybridize()
    outputs1 = sym_block(*x)

    sym_filename, params_filename = sym_block.export(
        str(tmp_path / 'sym-block'))

    # load model and partition
    sym_block = nn.SymbolBlock.imports(sym_filename,
                                       sym[1],
                                       params_filename,
                                       ctx=mx.current_context())
    check_call(
        _LIB.MXSetSubgraphPropertyOpNamesV2(c_str(subgraph_backend),
                                            mx_uint(len(op_names)),
                                            c_str_array(op_names)))
    sym_block.optimize_for(*x, backend=subgraph_backend)
    outputs2 = sym_block(*x)
    check_call(_LIB.MXRemoveSubgraphPropertyOpNamesV2(c_str(subgraph_backend)))

    # compare outputs
    assert len(outputs1) == len(outputs2)
    for i in range(len(outputs1)):
        assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(),
                            onp.zeros(shape=(1, )))
Esempio n. 51
0
def test_subgraph_exe3(sym, subgraph_backend, op_names):
    """Use the partitioned sym to bind an executor and compare the outputs
    with those of the original executor"""
    sym, _, _ = sym
    out = SymbolHandle()
    check_call(
        _LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend),
                                      mx_uint(len(op_names)),
                                      c_str_array(op_names),
                                      ctypes.byref(out)))

    partitioned_sym = Symbol(out)
    input_names = sym.list_inputs()
    arg_names = sym.list_arguments()
    aux_names = sym.list_auxiliary_states()
    assert partitioned_sym.list_inputs() == input_names
    assert partitioned_sym.list_arguments() == arg_names
    assert partitioned_sym.list_auxiliary_states() == aux_names
    arg_shapes, _, aux_shapes = sym.infer_shape()
    arg_array = [mx.nd.random.uniform(shape=shape) for shape in arg_shapes]
    aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes]
    exe = sym._bind(ctx=mx.current_context(),
                    args=arg_array,
                    aux_states=aux_array,
                    grad_req='null')
    partitioned_exe = partitioned_sym._bind(ctx=mx.current_context(),
                                            args=arg_array,
                                            aux_states=aux_array,
                                            grad_req='null')
    exe.forward()
    partitioned_exe.forward()
    assert len(exe.outputs) == len(partitioned_exe.outputs)
    for i in range(len(exe.outputs)):
        assert_almost_equal((exe.outputs[i] -
                             partitioned_exe.outputs[i]).abs().sum().asnumpy(),
                            onp.zeros(shape=(1, )))
Esempio n. 52
0
def test_mkldnn_engine_threading():
    net = gluon.nn.HybridSequential()
    with net.name_scope():
        net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
    net.collect_params().initialize(ctx=mx.cpu())
    class Dummy(gluon.data.Dataset):
        def __len__(self):
            return 2
        def __getitem__(self, key):
            return key, np.ones((3, 224, 224)), np.ones((10, ))

    loader = gluon.data.DataLoader(Dummy(), batch_size=2, num_workers=1)

    X = (32, 3, 32, 32)
    # trigger mkldnn execution thread
    y = net(mx.nd.array(np.ones(X))).asnumpy()

    # Use Gluon dataloader to trigger different thread.
    # below line triggers different execution thread
    for _ in loader:
        y = net(mx.nd.array(np.ones(X))).asnumpy()
        # output should be 016711406 (non-mkldnn mode output)
        assert_almost_equal(y[0, 0, 0, 0], 0.016711406)
        break
Esempio n. 53
0
def test_mkldnn_engine_threading():
    """
    This test will trigger mkldnn engine on different thread of execution.
    The test will first kickoff simple model calculation, and then uses a
    gluon data iterator to trigger different thread context, and executes
    the model on this new thread.
    """

    import mxnet as mx
    from mxnet import gluon, nd

    net = gluon.nn.HybridSequential()
    with net.name_scope():
        net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None))
    net.collect_params().initialize(ctx=mx.cpu())

    class Dummy(gluon.data.Dataset):
        def __len__(self):
            return 2

        def __getitem__(self, key):
            return key, np.ones((3, 224, 224)), np.ones((10, ))

    loader = gluon.data.DataLoader(Dummy(), batch_size=2, num_workers=1)

    X = (32, 3, 32, 32)
    # trigger mkldnn execution thread
    y = net(nd.array(np.ones(X))).asnumpy()

    # Use Gluon dataloader to trigger different thread.
    # below line triggers different execution thread
    for _ in loader:
        y = net(nd.array(np.ones(X))).asnumpy()
        # output should have 0.3376348
        assert_almost_equal(y[0, 0, 0, 0], 0.3376348)
        break
Esempio n. 54
0
def test_seg_broadcast_binary():
    for ctx in [mx.cpu(), mx.gpu()]:
        for np_func, nd_func, name in [
            (npy_seg_broadcast_add, nd.contrib.seg_broadcast_add, 'add'),
            (npy_seg_broadcast_mul, nd.contrib.seg_broadcast_mul, 'mul')
        ]:
            for batch_size, seg_num, nnz in [(1, 5, 10), (10, 50, 100),
                                             (4, 1000, 10000)]:
                lhs_npy = np.random.normal(0, 1, (batch_size, nnz))
                rhs_npy = np.random.normal(0, 1, (batch_size, seg_num))
                indptr_npy = rand_indptr(seg_num, nnz)

                # Test broadcast_add
                print('broadcast_' + name)
                gt_npy = np_func(lhs_npy, rhs_npy, indptr_npy)
                # Test mx.nd
                lhs_nd = nd.array(lhs_npy, dtype=np.float32, ctx=ctx)
                rhs_nd = nd.array(rhs_npy, dtype=np.float32, ctx=ctx)
                indptr_nd = nd.array(indptr_npy, dtype=np.int32, ctx=ctx)
                ret_nd = nd_func(lhs=lhs_nd, rhs=rhs_nd, indptr=indptr_nd)
                assert_almost_equal(ret_nd.asnumpy(),
                                    gt_npy,
                                    rtol=1E-4,
                                    atol=1E-4)
Esempio n. 55
0
def test_subgraph_backend_gluon_ext2(tmpdir):
    class Net(gluon.HybridBlock):
        def __init__(self, **kwargs):
            super(Net, self).__init__(**kwargs)
            with self.name_scope():
                self.fc1 = nn.Dense(256)
                self.fc2 = nn.Dense(128)
                self.fc3 = nn.Dense(2)

        def hybrid_forward(self, F, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            return self.fc3(x)
    # regular inference
    x = nd.random.normal(shape=(1, 512),ctx=mx.current_context())
    net = Net()
    net.collect_params().initialize(ctx=mx.current_context())
    outputs1 = net(x)
    param_path = os.path.join(str(tmpdir), 'test_subgraph_backend_gluon_ext2.params')
    net.save_parameters(param_path)

    # after partitioning
    net = Net()
    net.load_parameters(param_path, ctx=mx.current_context())
    subgraph_backend = 'default'
    op_names = ['FullyConnected']
    check_call(_LIB.MXSetSubgraphPropertyOpNamesV2(c_str(subgraph_backend), mx_uint(len(op_names)),
                                                c_str_array(op_names)))
    net.hybridize(backend = subgraph_backend)
    outputs2 = net(x)
    check_call(_LIB.MXRemoveSubgraphPropertyOpNamesV2(c_str(subgraph_backend)))

    # compare outputs
    assert len(outputs1) == len(outputs2)
    for i in range(len(outputs1)):
        assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))
Esempio n. 56
0
def test_np_loss_ndarray():
    # Ported from test_loss.test_loss_ndarray
    output = np.array([1, 2, 3, 4])
    label = np.array([1, 3, 5, 7])
    weighting = np.array([0.5, 1, 0.5, 1])

    loss = gluon.loss.L1Loss()
    assert float(np.sum(loss(output, label))) == 6.
    loss = gluon.loss.L1Loss(weight=0.5)
    assert float(np.sum(loss(output, label))) == 3.
    loss = gluon.loss.L1Loss()
    assert float(np.sum(loss(output, label, weighting))) == 5.

    loss = gluon.loss.L2Loss()
    assert float(np.sum(loss(output, label))) == 7.
    loss = gluon.loss.L2Loss(weight=0.25)
    assert float(np.sum(loss(output, label))) == 1.75
    loss = gluon.loss.L2Loss()
    assert float(np.sum(loss(output, label, weighting))) == 6

    output = np.array([[0, 2], [1, 4]])
    label = np.array([0, 1])
    weighting = np.array([[0.5], [1.0]])

    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    L = loss(output, label).asnumpy()
    assert_almost_equal(L,
                        _np.array([2.12692809, 0.04858733]),
                        use_broadcast=False,
                        rtol=1e-3)

    L = loss(output, label, weighting).asnumpy()
    assert_almost_equal(L,
                        _np.array([1.06346405, 0.04858733]),
                        use_broadcast=False,
                        rtol=1e-3)
Esempio n. 57
0
def test_softmax_cross_entropy():
    # dtype of input data, mxnet cross entropy set explicitly to float64
    # numpy implicitly takes care of double precision
    batch_size = SMALL_Y
    num_labels = LARGE_X
    input_data = mx.nd.ones((batch_size, num_labels), dtype="float64")
    input_label = mx.nd.zeros((batch_size, ), dtype="float64")

    true_softmax = np.full((batch_size, num_labels), (1 / num_labels))
    # use 1/batch_size when softmax axis=0
    # here 1/num_labels since softmax_cross_entropy uses default axis
    # by default axis=1
    np_one_hot_label = np.zeros((batch_size, num_labels))
    np_one_hot_label[:, 0] = 1

    true_softmax_cross_entropy = np.sum(-np.log(true_softmax) *
                                        np_one_hot_label)
    mx_softmax_cross_entropy = mx.nd.softmax_cross_entropy(input_data,
                                                           input_label,
                                                           dtype="float64")
    assert_almost_equal(mx_softmax_cross_entropy.asnumpy(),
                        true_softmax_cross_entropy,
                        rtol=1e-3,
                        atol=1e-5)
Esempio n. 58
0
def check_amp_fuse(net, data_example, expected_sym=None, quantized_nodes=[], rtol=0.05):
  net.hybridize()
  out_ref = net(*data_example)

  net.optimize_for(data_example, backend=SG_PASS_NAME)  # amp pass works only on oneDNN nodes
  lp_net = amp.convert_hybrid_block(net, data_example, target_dtype=AMP_DTYPE,
                                    excluded_sym_names=quantized_nodes, cast_params_offline=True,
                                    device=mx.current_context())
  lp_net.optimize_for(data_example, backend=AMP_SG_PASS_NAME)
  out_lp_net = lp_net(*data_example)

  # check outputs
  out_ref = [out_ref] if not isinstance(out_ref, list) else out_ref
  out_lp_net = [out_lp_net] if not isinstance(out_ref, list) else out_lp_net
  for ref_out, lp_out in zip(out_ref, out_lp_net):
    assert_almost_equal(ref_out, lp_out, rtol=rtol, atol=1.0)

  # check graph
  if expected_sym is not None:
    lp_symnet = lp_net.export(None, remove_amp_cast=False)[0]
    same_graph_structure(lp_symnet, expected_sym, True)

  # check amp with quantization
  check_amp_with_quantization(net, data_example, quantized_nodes)
Esempio n. 59
0
    def _check_subgraph_exe8(sym, subgraph_backend, op_names):
        """Call optimize_for to infer shapes, types and dtypes followed by graph partitioning,
        then bind and compare results of the partitioned sym and the original sym."""
        # bind
        arg_shapes, _, aux_shapes = sym.infer_shape()
        arg_array = [mx.nd.random.uniform(shape=shape) for shape in arg_shapes]
        aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes]
        exe1 = sym.bind(ctx=mx.current_context(),
                        args=arg_array,
                        aux_states=aux_array,
                        grad_req='null')
        exe1.forward()

        # infer shape/type before partition before bind
        check_call(
            _LIB.MXSetSubgraphPropertyOpNamesV2(c_str(subgraph_backend),
                                                mx_uint(len(op_names)),
                                                c_str_array(op_names)))
        part_sym = sym.optimize_for(subgraph_backend, arg_array)
        check_call(
            _LIB.MXRemoveSubgraphPropertyOpNamesV2(c_str(subgraph_backend)))

        exe2 = part_sym.bind(ctx=mx.current_context(),
                             args=arg_array,
                             aux_states=aux_array,
                             grad_req='null')
        exe2.forward()

        # compare outputs
        outputs1 = exe1.outputs
        outputs2 = exe2.outputs
        assert len(outputs1) == len(outputs2)
        for i in range(len(outputs1)):
            assert_almost_equal(
                (outputs1[i] - outputs2[i]).abs().sum().asnumpy(),
                np.zeros(shape=(1, )))
Esempio n. 60
0
def test_sparse_aggregator():
    """aggregate sparse ndarray on muliple devices"""

    stype = 'row_sparse'
    kv = init_kv(stype)

    # devices
    num_devs = 4
    devs = [mx.Context('cpu', i) for i in range(num_devs)]

    # single
    vals = [rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)]
    expected_sum = np.zeros(shape)
    for v in vals:
        expected_sum += v.asnumpy()

    kv.push(3, vals)
    kv.pull(3, out = vals)
    result_sum = np.zeros(shape)
    for v in vals:
        result_sum += v.asnumpy()
    assert_almost_equal(result_sum, expected_sum * num_devs)

    # list
    vals = [[rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)]] * len(keys)
    expected_sum = np.zeros(shape)
    for v in vals[0]:
        expected_sum += v.asnumpy()

    kv.push(keys, vals)
    kv.pull(keys, out = vals)
    for vv in vals:
        result_sum = np.zeros(shape)
        for v in vv:
            result_sum += v.asnumpy()
        assert_almost_equal(result_sum, expected_sum * num_devs)