Exemple #1
0
    def check_vardrop(drop_inputs, drop_states, drop_outputs):
        cell = contrib.rnn.VariationalDropoutCell(mx.gluon.rnn.RNNCell(100, prefix='rnn_'),
                                                  drop_outputs=drop_outputs,
                                                  drop_states=drop_states,
                                                  drop_inputs=drop_inputs)
        cell.collect_params().initialize(init='xavier')
        input_data = mx.nd.random_uniform(shape=(10, 3, 50), ctx=mx.context.current_context())
        with mx.autograd.record():
            outputs1, _ = cell.unroll(3, input_data, merge_outputs=True)
            mx.nd.waitall()
            outputs2, _ = cell.unroll(3, input_data, merge_outputs=True)
        assert not almost_equal(outputs1.asnumpy(), outputs2.asnumpy())

        inputs = [mx.sym.Variable('rnn_t%d_data'%i) for i in range(3)]
        outputs, _ = cell.unroll(3, inputs, merge_outputs=False)
        outputs = mx.sym.Group(outputs)

        args, outs, auxs = outputs.infer_shape(rnn_t0_data=(10,50), rnn_t1_data=(10,50), rnn_t2_data=(10,50))
        assert outs == [(10, 100), (10, 100), (10, 100)]

        cell.reset()
        cell.hybridize()
        with mx.autograd.record():
            outputs3, _ = cell.unroll(3, input_data, merge_outputs=True)
            mx.nd.waitall()
            outputs4, _ = cell.unroll(3, input_data, merge_outputs=True)
        assert not almost_equal(outputs3.asnumpy(), outputs4.asnumpy())
        assert not almost_equal(outputs1.asnumpy(), outputs3.asnumpy())
    def check_vardrop(drop_inputs, drop_states, drop_outputs):
        cell = contrib.rnn.VariationalDropoutCell(mx.gluon.rnn.RNNCell(100, prefix='rnn_'),
                                                  drop_outputs=drop_outputs,
                                                  drop_states=drop_states,
                                                  drop_inputs=drop_inputs)
        cell.collect_params().initialize(init='xavier')
        input_data = mx.nd.random_uniform(shape=(10, 3, 50), ctx=mx.context.current_context())
        with mx.autograd.record():
            outputs1, _ = cell.unroll(3, input_data, merge_outputs=True)
            mask1 = cell.drop_outputs_mask.asnumpy()
            mx.nd.waitall()
            outputs2, _ = cell.unroll(3, input_data, merge_outputs=True)
            mask2 = cell.drop_outputs_mask.asnumpy()
        assert not almost_equal(mask1, mask2)
        assert not almost_equal(outputs1.asnumpy(), outputs2.asnumpy())

        inputs = [mx.sym.Variable('rnn_t%d_data'%i) for i in range(3)]
        outputs, _ = cell.unroll(3, inputs, merge_outputs=False)
        outputs = mx.sym.Group(outputs)

        args, outs, auxs = outputs.infer_shape(rnn_t0_data=(10,50), rnn_t1_data=(10,50), rnn_t2_data=(10,50))
        assert outs == [(10, 100), (10, 100), (10, 100)]

        cell.reset()
        cell.hybridize()
        with mx.autograd.record():
            outputs3, _ = cell.unroll(3, input_data, merge_outputs=True)
            mx.nd.waitall()
            outputs4, _ = cell.unroll(3, input_data, merge_outputs=True)
        assert not almost_equal(outputs3.asnumpy(), outputs4.asnumpy())
        assert not almost_equal(outputs1.asnumpy(), outputs3.asnumpy())
Exemple #3
0
    def check_vardrop(drop_inputs, drop_states, drop_outputs):
        cell = gluon.rnn.VariationalDropoutCell(mx.gluon.rnn.RNNCell(100),
                                                drop_outputs=drop_outputs,
                                                drop_states=drop_states,
                                                drop_inputs=drop_inputs)

        input_data = mx.np.random.uniform(size=(10, 3, 50), ctx=mx.context.current_context())
        cell.infer_shape(0, input_data, False)
        cell.initialize(init='xavier')
        with mx.autograd.record():
            outputs1, _ = cell.unroll(3, input_data, merge_outputs=True)
            mx.npx.waitall()
            outputs2, _ = cell.unroll(3, input_data, merge_outputs=True)
        assert not almost_equal(outputs1.asnumpy(), outputs2.asnumpy())

        inputs = [mx.np.ones(shape=(10,50)) for i in range(3)]
        cell.infer_shape(0, inputs[0], False)
        cell.initialize()
        outputs, _ = cell.unroll(3, inputs, merge_outputs=False)

        outs = [o.shape for o in outputs]
        assert outs == [(10, 100), (10, 100), (10, 100)]

        cell.reset()
        cell.hybridize()
        with mx.autograd.record():
            outputs3, _ = cell.unroll(3, input_data, merge_outputs=True)
            mx.npx.waitall()
            outputs4, _ = cell.unroll(3, input_data, merge_outputs=True)
        assert not almost_equal(outputs3.asnumpy(), outputs4.asnumpy())
        assert not almost_equal(outputs1.asnumpy(), outputs3.asnumpy())
 def check_fluent_regular(func, kwargs, shape=(5, 17, 1), equal_nan=False):
     with mx.name.NameManager():
         data = mx.nd.random_uniform(shape=shape, ctx=default_context())
         regular = getattr(mx.ndarray, func)(data, **kwargs)
         fluent = getattr(data, func)(**kwargs)
         if isinstance(regular, list):
             for r, f in zip(regular, fluent):
                 assert almost_equal(r.asnumpy(), f.asnumpy(), equal_nan=equal_nan)
         else:
             assert almost_equal(regular.asnumpy(), fluent.asnumpy(), equal_nan=equal_nan)
 def check_fluent_regular(func, kwargs, shape=(5, 17, 1), equal_nan=False):
     with mx.name.NameManager():
         data = mx.nd.random_uniform(shape=shape, ctx=default_context())
         regular = getattr(mx.ndarray, func)(data, **kwargs)
         fluent = getattr(data, func)(**kwargs)
         if isinstance(regular, list):
             for r, f in zip(regular, fluent):
                 assert almost_equal(r.asnumpy(), f.asnumpy(), equal_nan=equal_nan)
         else:
             assert almost_equal(regular.asnumpy(), fluent.asnumpy(), equal_nan=equal_nan)
Exemple #6
0
    def test_horovod_allreduce_postscale(self):
        """Test that the allreduce correctly sums 1D, 2D, 3D tensors with postscaling."""
        hvd.init()
        size = hvd.size()
        dtypes = self.filter_supported_types(
            ['int32', 'int64', 'float16', 'float32', 'float64'])
        int_types = ['int32', 'int64']
        dims = [1, 2, 3]
        ctx = self._current_context()
        count = 1
        shapes = [(), (17), (17, 17), (17, 17, 17)]
        for dtype, dim in itertools.product(dtypes, dims):
            mx.random.seed(1234, ctx=ctx)
            np.random.seed(1234)
            tensor = mx.nd.random.uniform(-100,
                                          100,
                                          shape=shapes[dim],
                                          ctx=ctx)
            tensor = tensor.astype(dtype)
            factor = np.random.uniform()
            scaled = hvd.allreduce(tensor,
                                   average=False,
                                   name=str(count),
                                   postscale_factor=factor)

            factor = mx.nd.array([factor], dtype='float64', ctx=ctx)
            if ctx != mx.cpu() and not int(
                    os.environ.get('HOROVOD_MIXED_INSTALL', 0)):
                # For integer types, scaling done in FP64
                factor = factor.astype('float64' if dtype in
                                       int_types else dtype)
                tensor = tensor.astype('float64' if dtype in
                                       int_types else dtype)
            else:
                # For integer types, scaling done in FP64, FP32 math for FP16 on CPU
                factor = factor.astype('float32' if dtype ==
                                       'float16' else 'float64' if dtype in
                                       int_types else dtype)
                tensor = tensor.astype('float32' if dtype ==
                                       'float16' else 'float64' if dtype in
                                       int_types else dtype)

            expected = tensor * size
            expected *= factor
            expected = expected.astype(dtype)
            count += 1

            # Threshold for floating point equality depends on number of
            # ranks, since we're comparing against precise multiplication.
            if size <= 3 or dtype in int_types:
                threshold = 0
            elif size < 10:
                threshold = 1e-4
            elif size < 15:
                threshold = 5e-4
            else:
                break

            assert almost_equal(expected.asnumpy(), scaled.asnumpy(), atol=threshold), \
                f'hvd.allreduce produces incorrect results for pre/post scaling: {hvd.rank()} {count} {dtype} {dim}'
Exemple #7
0
    def test_horovod_allreduce_inplace(self):
        """Test that the allreduce correctly sums 1D, 2D, 3D tensors."""
        hvd.init()
        size = hvd.size()
        dtypes = self.filter_supported_types(
            ['int32', 'int64', 'float32', 'float64'])
        dims = [1, 2, 3]
        ctx = self._current_context()
        count = 0
        shapes = [(), (17), (17, 17), (17, 17, 17)]
        for dtype, dim in itertools.product(dtypes, dims):
            mx.random.seed(1234, ctx=ctx)
            tensor = mx.nd.random.uniform(-100,
                                          100,
                                          shape=shapes[dim],
                                          ctx=ctx)
            tensor = tensor.astype(dtype)
            multiplied = tensor * size
            hvd.allreduce_(tensor, average=False, name=str(count))
            count += 1

            # Threshold for floating point equality depends on number of
            # ranks, since we're comparing against precise multiplication.
            if size <= 3 or dtype in ['int32', 'int64']:
                threshold = 0
            elif size < 10:
                threshold = 1e-4
            elif size < 15:
                threshold = 5e-4
            else:
                break

            assert almost_equal(tensor.asnumpy(), multiplied.asnumpy(), atol=threshold), \
                f'hvd.allreduce produces incorrect results for self: {hvd.rank()} {count} {dtype} {dim}'
Exemple #8
0
    def test_horovod_grouped_allreduce_average(self):
        """Test that the grouped allreduce correctly averages 1D, 2D, 3D tensors."""
        hvd.init()
        size = hvd.size()
        dtypes = self.filter_supported_types(
            ['int32', 'int64', 'float32', 'float64'])
        dims = [1, 2, 3]
        ctx = self._current_context()
        count = 1
        shapes = [(), (17), (17, 17), (17, 17, 17)]
        for dtype, dim in itertools.product(dtypes, dims):
            mx.random.seed(1234, ctx=ctx)

            tensors = [
                mx.nd.random.uniform(-100, 100, shape=shapes[dim], ctx=ctx)
                for _ in range(5)
            ]

            tensors = [tensor.astype(dtype) for tensor in tensors]
            tensors = [tensor * size for tensor in tensors]
            tensors = [tensor / size for tensor in tensors]

            averaged = hvd.grouped_allreduce(tensors,
                                             average=True,
                                             name=str(count))

            count += 1

            # Threshold for floating point equality depends on number of
            # ranks, since we're comparing against precise multiplication.
            if size <= 3 or dtype in ['int32', 'int64']:
                threshold = 0
            elif size < 10:
                threshold = 1e-4
            elif size < 15:
                threshold = 5e-4
            else:
                break

            assert all([almost_equal(t1.asnumpy(), t2.asnumpy(), atol=threshold)
                for t1, t2 in zip(averaged, tensors)]), \
                f'hvd.grouped_allreduce produces incorrect results for average: {hvd.rank()} {count} {dtype} {dim}'
Exemple #9
0
    def verify(batch_size):
        print('verifying batch size: ', batch_size)
        fold = Fold()
        num_samples = 100
        inputs = []
        fold_preds = []
        for i in range(num_samples):
            # get next batch
            l_sent = l_sentences[i]
            r_sent = r_sentences[i]
            sent = mx.nd.concat(l_sent, r_sent, dim=0)
            l_len = len(l_sent)
            l_tree = l_trees[i]
            r_tree = r_trees[i]

            inputs.append((sent, l_len, l_tree, r_tree))
            z_fold = net.fold_encode(fold, sent, l_len, l_tree, r_tree)
            fold_preds.append(z_fold)

            if (i + 1) % batch_size == 0 or (i + 1) == num_samples:
                fold_outs = fold([fold_preds])[0]
                outs = mx.nd.concat(*[
                    net(sent, l_len, l_tree, r_tree)
                    for sent, l_len, l_tree, r_tree in inputs
                ],
                                    dim=0)
                if not almost_equal(fold_outs.asnumpy(), outs.asnumpy()):
                    print(fold_preds)
                    print('l_sents: ', l_sent, l_sentences[i - 1])
                    print('r_sents: ', r_sent, r_sentences[i - 1])
                    print('\n'.join(
                        (str(l_tree), str_tree(l_tree), str(r_tree),
                         str_tree(r_tree), str(l_trees[i - 1]),
                         str_tree(l_trees[i - 1]), str(r_trees[i - 1]),
                         str_tree(r_trees[i - 1]), str(fold))))
                    assert_almost_equal(fold_outs.asnumpy(), outs.asnumpy())
                fold_preds = []
                inputs = []
                fold.reset()