def test_nobroadcast_fp16(): context.set_context(mode=context.GRAPH_MODE, device_target='GPU') np.random.seed(42) x1_np = np.random.rand(10, 20).astype(np.float16) x2_np = np.random.rand(10, 20).astype(np.float16) output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.minimum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.maximum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np > x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np < x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np)) output_np = np.power(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.RealDiv()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np / x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Mul()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np * x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Sub()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np - x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np / x2_np assert np.allclose(output_ms.asnumpy(), output_np) x2_np_zero = np.zeros_like(x2_np) output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np_zero)) assert np.allclose(output_ms.asnumpy(), x2_np_zero) output_ms = P.Mod()(Tensor(x1_np), Tensor(x2_np)) output_np = np.fmod(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.FloorMod()(Tensor(x1_np), Tensor(x2_np)) output_np = np.mod(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np)
def test_broadcast_diff_dims(): context.set_context(mode=context.GRAPH_MODE, device_target='GPU') np.random.seed(42) x1_np = np.random.rand(2).astype(np.float32) x2_np = np.random.rand(2, 1).astype(np.float32) x1_np_int32 = np.random.randint(0, 100, (2)).astype(np.int32) x2_np_int32 = np.random.randint(0, 100, (2, 1)).astype(np.int32) output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.minimum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.maximum(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Greater()(Tensor(x1_np_int32), Tensor(x2_np_int32)) output_np = x1_np_int32 > x2_np_int32 assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np > x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np < x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32)) output_np = x1_np_int32 < x2_np_int32 assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np)) output_np = np.power(x1_np, x2_np) assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.RealDiv()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np / x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Mul()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np * x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Sub()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np - x2_np assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np / x2_np assert np.allclose(output_ms.asnumpy(), output_np) x2_np_zero = np.zeros_like(x2_np) output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np_zero)) assert np.allclose(output_ms.asnumpy(), x2_np_zero)
def test_divnonan_uint8(): context.set_context(mode=context.GRAPH_MODE, device_target='GPU') np.random.seed(42) x1_np_uint8 = np.random.randint(1, 100, (10, 20)).astype(np.uint8) x2_np_uint8 = np.random.randint(1, 100, (10, 20)).astype(np.uint8) output_ms = P.DivNoNan()(Tensor(x1_np_uint8), Tensor(x2_np_uint8)) output_np = x1_np_uint8 // x2_np_uint8 print(output_ms.asnumpy(), output_np) assert np.allclose(output_ms.asnumpy(), output_np) x2_np_zero = np.zeros_like(x2_np_uint8) output_ms = P.DivNoNan()(Tensor(x1_np_uint8), Tensor(x2_np_zero)) assert np.allclose(output_ms.asnumpy(), x2_np_zero)
def __init__(self, vocab_size, embedding_size, field_size, param_init='normal', target='CPU', slice_mode='batch_slice', feature_num_list=None, max_norm=None, sparse=True, operator='SUM'): super(MultiFieldEmbeddingLookup, self).__init__(vocab_size, embedding_size, param_init, target, slice_mode, feature_num_list, max_norm, sparse) self.field_size = validator.check_positive_int(field_size, 'field_size') self.operator = operator self.mul = P.Mul() self.inf_mask_mul = P.Mul() self.bias_add = P.Add() self.inf_add = P.Add() self.merge_op = None self.count_op = P.UnsortedSegmentSum() self.abs = P.Abs() self.equal = P.Equal() self.add = P.Add() self.cast = P.Cast() self.div_no_nan = P.DivNoNan() self.expand = P.ExpandDims() self.max_mask_mul = P.Mul() self.max_no_equal = P.NotEqual() if operator == MultiFieldEmbeddingLookup.OPERATOR_SUM: self.merge_op = P.UnsortedSegmentSum() elif operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.merge_op = P.UnsortedSegmentMax() elif operator == MultiFieldEmbeddingLookup.OPERATOR_MEAN: self.merge_op = P.UnsortedSegmentSum() else: raise ValueError( "The operator supports ['SUM', 'MAX', 'MEAN'], but found: " + str(operator)) parallel_mode = _get_parallel_mode() is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) if slice_mode in ["table_row_slice", "batch_slice" ] and is_auto_parallel: self.merge_op.shard( ((get_group_size(), 1, 1), (get_group_size(), 1))) self.expand.shard(((get_group_size(), ), )) self.bias_add.shard(((1, 1), (1, 1))) self.mul.shard( ((get_group_size(), 1, 1), (get_group_size(), 1, 1))) self.count_op.shard(((get_group_size(), 1), (get_group_size(), 1))) self.add.shard(((get_group_size(), ), (get_group_size(), ))) self.div_no_nan.shard( ((get_group_size(), 1), (get_group_size(), 1))) self.max_mask_mul.shard( ((get_group_size(), 1), (get_group_size(), 1))) self.max_no_equal.shard(((1, ), ())) if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.equal.shard(((get_group_size(), 1, 1), ())) self.inf_mask_mul.shard(((get_group_size(), 1, 1), ())) self.merge_op.shard( ((get_group_size(), 1), (get_group_size(), ))) self.count_op.shard( ((get_group_size(), ), (get_group_size(), ))) self.inf_add.shard( ((get_group_size(), 1, 1), (get_group_size(), 1, 1))) elif slice_mode == "table_column_slice" and is_auto_parallel: self.merge_op.shard(((1, 1, get_group_size()), (1, 1))) self.div_no_nan.shard(((1, get_group_size()), (1, 1))) self.bias_add.shard(((1, 1), (1, 1))) self.mul.shard(((1, 1, 1), (1, 1, get_group_size()))) self.count_op.shard(((1, 1), (1, 1))) self.add.shard(((1, ), (1, ))) self.max_mask_mul.shard(((1, get_group_size()), (1, 1))) self.expand.shard(((1, ), )) self.max_no_equal.shard(((1, ), ())) if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.equal.shard(((1, 1, 1), ())) self.inf_mask_mul.shard(((1, 1, 1), ())) self.merge_op.shard(((1, get_group_size()), (1, ))) self.count_op.shard(((1, ), (1, ))) self.inf_add.shard(((1, 1, get_group_size()), (1, 1, 1))) else: if is_auto_parallel: raise ValueError( "slice_mode should be ['table_row_slice', 'batch_slice' and \ 'table_column_slice'], but get " + str(slice_mode)) # Min value for fp32 self.negative_inf_value = -3.402823466E+38
def __init__(self, strategy1, strategy2): super().__init__() self.matmul = P.MatMul().shard(strategy1) self.divNoNan = P.DivNoNan().shard(strategy2)