def test_broadcast(): a = sym.Variable("a") b = sym.Variable("b") inputs = [('a', (3, 4, 5), a), ('b', (1, 5), b)] dtype = "float32" def _collapse(g): return g.reshape(-1, inputs[-1][1][-1]).sum(0, keepdims=True) y = sym.broadcast_add(a, b) def _backward_add(head_grads, a, b): da = head_grads db = _collapse(head_grads) return da, db helper(y, inputs, dtype, lambda a, b: a + b, _backward_add) y = sym.broadcast_sub(a, b) def _backward_sub(head_grads, a, b): da = head_grads db = -_collapse(head_grads) return da, db helper(y, inputs, dtype, lambda a, b: a - b, _backward_sub) y = sym.broadcast_mul(a, b) def _backward_mul(head_grads, a, b): da = head_grads * b db = _collapse(head_grads * a) return da, db helper(y, inputs, dtype, lambda a, b: a * b, _backward_mul) y = sym.broadcast_div(a, b) def _backward_div(head_grads, a, b): da = head_grads / b db = _collapse(head_grads * a / (2 * b**2)) return da, db helper(y, inputs, dtype, lambda a, b: a / b, _backward_div) y = sym.broadcast_mod(a, b) helper(y, inputs, 'int32', lambda a, b: np.mod(a, b), in_range={'a': (0.001, 100), 'b': (1, 100)}) y = sym.broadcast_max(a, b) helper(y, inputs, dtype, lambda a, b: np.maximum(a, b)) y = sym.broadcast_min(a, b) helper(y, inputs, dtype, lambda a, b: np.minimum(a, b)) y = sym.broadcast_pow(a, b) helper(y, inputs, dtype, lambda a, b: np.power(a, b), in_range={'a': (0.001, 100), 'b': (0.001, 2)}) y = sym.broadcast_left_shift(a, b) helper(y, inputs, 'int32', lambda a, b: a << b) y = sym.broadcast_right_shift(a, b) helper(y, inputs, 'int32', lambda a, b: a >> b) y = sym.broadcast_greater(a, b) helper(y, inputs, dtype, lambda a, b: np.greater(a, b)) y = sym.broadcast_less(a, b) helper(y, inputs, dtype, lambda a, b: np.less(a, b)) y = sym.broadcast_equal(a, b) helper(y, inputs, 'int32', lambda a, b: np.equal(a, b), in_range={'a': (-2, 2), 'b': (-2, 2)}) y = sym.broadcast_not_equal(a, b) helper(y, inputs, 'int32', lambda a, b: np.not_equal(a, b), in_range={'a': (-2, 2), 'b': (-2, 2)}) y = sym.broadcast_greater_equal(a, b) helper(y, inputs, 'int32', lambda a, b: np.greater_equal(a, b), in_range={'a': (-3, 3), 'b': (-3, 3)}) y = sym.broadcast_less_equal(a, b) helper(y, inputs, 'int32', lambda a, b: np.less_equal(a, b), in_range={'a': (-3, 3), 'b': (-3, 3)})
def test_broadcast(): a = sym.Variable("a") b = sym.Variable("b") shape = {'a': (3, 4, 5), 'b': (1, 5)} def _collapse(g): return g.reshape(-1, shape['b'][-1]).sum(0, keepdims=True) y = sym.broadcast_add(a, b) def _backward_add(head_grads, a, b): da = head_grads db = _collapse(head_grads) return da, db check_function(y, lambda a, b: a + b, _backward_add, shape=shape) y = sym.broadcast_sub(a, b) def _backward_sub(head_grads, a, b): da = head_grads db = -_collapse(head_grads) return da, db check_function(y, lambda a, b: a - b, _backward_sub, shape=shape) y = sym.broadcast_mul(a, b) def _backward_mul(head_grads, a, b): da = head_grads * b db = _collapse(head_grads * a) return da, db check_function(y, lambda a, b: a * b, _backward_mul, shape=shape) y = sym.broadcast_div(a, b) def _backward_div(head_grads, a, b): da = head_grads / b db = _collapse(-head_grads * a / b**2) return da, db # We avoid computing numerical derivatives too close to zero here check_function(y, lambda a, b: a / b, _backward_div, shape=shape, numerical_grads=False) check_function(y, lambda a, b: a / b, _backward_div, shape=shape, in_range={'b': (0.1, 20)}) y = sym.broadcast_mod(a, b) check_function(y, lambda a, b: np.mod(a, b), in_range={ 'a': (0.001, 100), 'b': (1, 100) }, dtype='int32', shape=shape) y = sym.broadcast_max(a, b) check_function(y, lambda a, b: np.maximum(a, b), shape=shape) y = sym.broadcast_min(a, b) check_function(y, lambda a, b: np.minimum(a, b), shape=shape) y = sym.broadcast_pow(a, b) check_function(y, lambda a, b: np.power(a, b), in_range={ 'a': (0.001, 100), 'b': (0.001, 2) }, shape=shape) y = sym.broadcast_left_shift(a, b) check_function(y, lambda a, b: a << b, dtype='int32', shape=shape) y = sym.broadcast_right_shift(a, b) check_function(y, lambda a, b: a >> b, dtype='int32', shape=shape) y = sym.broadcast_greater(a, b) check_function(y, lambda a, b: np.greater(a, b), shape=shape) y = sym.broadcast_less(a, b) check_function(y, lambda a, b: np.less(a, b), shape=shape) y = sym.broadcast_equal(a, b) check_function(y, lambda a, b: np.equal(a, b), in_range={ 'a': (-2, 2), 'b': (-2, 2) }, dtype='int32', shape=shape) y = sym.broadcast_not_equal(a, b) check_function(y, lambda a, b: np.not_equal(a, b), in_range={ 'a': (-2, 2), 'b': (-2, 2) }, dtype='int32', shape=shape) y = sym.broadcast_greater_equal(a, b) check_function(y, lambda a, b: np.greater_equal(a, b), in_range={ 'a': (-3, 3), 'b': (-3, 3) }, dtype='int32', shape=shape) y = sym.broadcast_less_equal(a, b) check_function(y, lambda a, b: np.less_equal(a, b), in_range={ 'a': (-3, 3), 'b': (-3, 3) }, dtype='int32', shape=shape)
def test_broadcast(): a = sym.Variable("a") b = sym.Variable("b") shape = {'a': (3, 4, 5), 'b': (1, 5)} def _collapse(g): return g.reshape(-1, shape['b'][-1]).sum(0, keepdims=True) y = sym.broadcast_add(a, b) def _backward_add(head_grads, a, b): da = head_grads db = _collapse(head_grads) return da, db check_function(y, lambda a, b: a + b, _backward_add, shape=shape) y = sym.broadcast_sub(a, b) def _backward_sub(head_grads, a, b): da = head_grads db = -_collapse(head_grads) return da, db check_function(y, lambda a, b: a - b, _backward_sub, shape=shape) y = sym.broadcast_mul(a, b) def _backward_mul(head_grads, a, b): da = head_grads * b db = _collapse(head_grads * a) return da, db check_function(y, lambda a, b: a * b, _backward_mul, shape=shape) y = sym.broadcast_div(a, b) def _backward_div(head_grads, a, b): da = head_grads / b db = _collapse(- head_grads * a / b**2) return da, db # We avoid computing numerical derivatives too close to zero here check_function(y, lambda a, b: a / b, _backward_div, shape=shape, numerical_grads=False) check_function(y, lambda a, b: a / b, _backward_div, shape=shape, in_range={'b': (0.1, 20)}) y = sym.broadcast_mod(a, b) check_function(y, lambda a, b: np.mod(a, b), in_range={'a': (0.001, 100), 'b': (1, 100)}, dtype='int32', shape=shape) y = sym.broadcast_max(a, b) check_function(y, lambda a, b: np.maximum(a, b), shape=shape) y = sym.broadcast_min(a, b) check_function(y, lambda a, b: np.minimum(a, b), shape=shape) y = sym.broadcast_pow(a, b) check_function(y, lambda a, b: np.power(a, b), in_range={'a': (0.001, 100), 'b': (0.001, 2)}, shape=shape) y = sym.broadcast_left_shift(a, b) check_function(y, lambda a, b: a << b, dtype='int32', shape=shape) y = sym.broadcast_right_shift(a, b) check_function(y, lambda a, b: a >> b, dtype='int32', shape=shape) y = sym.broadcast_greater(a, b) check_function(y, lambda a, b: np.greater(a, b), shape=shape) y = sym.broadcast_less(a, b) check_function(y, lambda a, b: np.less(a, b), shape=shape) y = sym.broadcast_equal(a, b) check_function(y, lambda a, b: np.equal(a, b), in_range={'a': (-2, 2), 'b': (-2, 2)}, dtype='int32', shape=shape) y = sym.broadcast_not_equal(a, b) check_function(y, lambda a, b: np.not_equal(a, b), in_range={'a': (-2, 2), 'b': (-2, 2)}, dtype='int32', shape=shape) y = sym.broadcast_greater_equal(a, b) check_function(y, lambda a, b: np.greater_equal(a, b), in_range={'a': (-3, 3), 'b': (-3, 3)}, dtype='int32', shape=shape) y = sym.broadcast_less_equal(a, b) check_function(y, lambda a, b: np.less_equal(a, b), in_range={'a': (-3, 3), 'b': (-3, 3)}, dtype='int32', shape=shape)