def forward(self, x): sbp_1ds = [ flow.sbp.broadcast, flow.sbp.partial_sum, flow.sbp.split(0), flow.sbp.split(1), flow.sbp.split(2), flow.sbp.split(3), ] for sbp1 in sbp_1ds: for sbp2 in sbp_1ds: for sbp3 in sbp_1ds: # (2, 2) -> 3 # 4 is not divisible by 3 x = x.to_global( placement=flow.placement(type="cuda", ranks=np.array(range(3))), sbp=[sbp1], ) # 3 -> (2, 2) x = x.to_global( placement=flow.placement( type="cuda", ranks=np.array(range(4)).reshape(2, 2) ), sbp=[sbp2, sbp3], ) return x
def forward(self, x): sbp_1ds = [ flow.sbp.broadcast, flow.sbp.partial_sum, flow.sbp.split(0), flow.sbp.split(1), ] for sbp1 in sbp_1ds: for sbp2 in sbp_1ds: for sbp3 in sbp_1ds: for sbp4 in sbp_1ds: # (3, 2) -> (2, 2) x = x.to_global( placement=flow.placement( type="cuda", ranks=np.array(range(4)).reshape(2, 2)), sbp=[sbp1, sbp2], ) # (2, 2) -> (3, 2) x = x.to_global( placement=flow.placement( type="cuda", ranks=np.array(range(6)).reshape(3, 2)), sbp=[sbp3, sbp4], ) return x
def test_rand_graph(test_case): arg_dict = OrderedDict() arg_dict["shape"] = [(8, ), ( 8, 8, ), (8, 8, 8)] arg_dict["placement"] = [ # 1d flow.placement("cpu", ranks=[0, 1]), flow.placement("cuda", ranks=[0, 1]), # 2d flow.placement("cpu", ranks=[ [0, 1], ]), flow.placement("cuda", ranks=[ [0, 1], ]), ] for args in GenArgDict(arg_dict): shape = args["shape"] placement = args["placement"] for sbp in all_sbp(placement, max_dim=len(shape), except_partial_sum=True): _test_graph_rand(test_case, shape, placement, sbp)
def test_randperm_graph(test_case): arg_dict = OrderedDict() arg_dict["N"] = [i for i in range(10, 50, 10)] arg_dict["placement"] = [ # 1d flow.placement("cpu", ranks=[0, 1]), flow.placement("cuda", ranks=[0, 1]), # 2d flow.placement("cpu", ranks=[ [0, 1], ]), flow.placement("cuda", ranks=[ [0, 1], ]), ] arg_dict["dtype"] = [ flow.uint8, flow.int8, flow.int32, flow.int64, flow.float32, flow.float64, ] for args in GenArgDict(arg_dict): N = args["N"] placement = args["placement"] dtype = args["dtype"] for sbp in all_sbp(placement, max_dim=1, except_partial_sum=True): _test_graph_randperm(test_case, N, placement, sbp, dtype)
def test_constant_graph(test_case): arg_dict = OrderedDict() arg_dict["func"] = ["ones", "zeros", "new_zeros"] arg_dict["shape"] = [(8, ), ( 8, 8, ), (8, 8, 8)] arg_dict["placement"] = [ # 1d flow.placement("cpu", ranks=[0, 1]), flow.placement("cuda", ranks=[0, 1]), # 2d flow.placement("cpu", ranks=[ [0, 1], ]), flow.placement("cuda", ranks=[ [0, 1], ]), ] for args in GenArgDict(arg_dict): func = args["func"] shape = args["shape"] placement = args["placement"] for sbp in all_sbp(placement, max_dim=len(shape), except_partial_sum=True): _test_graph_constant(test_case, func, shape, placement, sbp)
def test_to_placement(test_case): rank = flow.env.get_rank() # pid = os.getpid() # print(f"[{pid}][{rank}] ToConsistentGraphTestCase.test_to_placement") if rank == 0: x = flow.ones((2, 3), dtype=flow.float32) elif rank == 1: x = flow.empty(tuple()) else: raise ValueError c_x = x.to_consistent(placement=flow.placement("cpu", {0: [0]}), sbp=flow.sbp.broadcast) # print(f"c_x shape: {c_x.shape}, placment: {c_x.placement}, sbp: {c_x.sbp}") p1 = flow.placement("cpu", {0: [0, 1]}) m1 = ToPlacementModule(p1) g1 = MyGraph(m1) y1 = g1(c_x) # print(f"y1 shape: {y1.shape}, placment: {y1.placement}, sbp: {y1.sbp}") test_case.assertTrue(y1.placement == p1) test_case.assertTrue(y1.sbp[0] == flow.sbp.broadcast) test_case.assertTrue(y1.to_local().numpy().mean() == 1.0) p2 = flow.placement("cuda", {0: [0, 1]}) m2 = ToPlacementModule(p2) g2 = MyGraph(m2) y2 = g2(y1) # print(f"y2 shape: {y2.shape}, placment: {y2.placement}, sbp: {y2.sbp}") test_case.assertTrue(y2.placement == p2) test_case.assertTrue(y2.sbp[0] == flow.sbp.broadcast) test_case.assertTrue(y2.to_local().numpy().mean() == 1.0)
def test_creating_consistent_tensor(test_case): shape = (2, 3) x = flow.Tensor(*shape, placement=flow.placement("gpu", ["0:0"], None)) x.set_placement(flow.placement("cpu", ["0:0"], None)) x.set_is_consistent(True) test_case.assertTrue(not x.is_cuda) x.determine()
def test_graph_inplace_cpu(test_case): x = flow.randn(10, 10, placement=flow.placement("cpu", ranks=[0, 1]), sbp=flow.sbp.split(1)) y = flow.ones(10, placement=flow.placement("cpu", ranks=[0, 1]), sbp=flow.sbp.broadcast) _test_graph_lazy_inplace(test_case, x, y)
def test_graph_inplace_gpu(test_case): x = flow.randn(10, 10, placement=flow.placement("cuda", {0: [0, 1]}), sbp=flow.sbp.split(1)) y = flow.ones(10, placement=flow.placement("cuda", {0: [0, 1]}), sbp=flow.sbp.broadcast) _test_graph_lazy_inplace(test_case, x, y)
def _test_graph_buffer_limit(test_case): class StageLayerModule(flow.nn.Module): def __init__(self): super().__init__() self.linear1 = flow.nn.Linear(10, 8, False) self.linear2 = flow.nn.Linear(8, 10, False) flow.nn.init.constant_(self.linear1.weight, 0.023) flow.nn.init.constant_(self.linear2.weight, 1.23) def forward(self, x): out0 = self.linear1(x) out0 = out0 + 1.0 out0 = out0 * 2.0 out1 = self.linear2(out0) return out1 P0 = flow.placement("cuda", {0: [0]}) P1 = flow.placement("cuda", {0: [1]}) PT = flow.placement("cuda", {0: [0, 1]}) B = flow.sbp.broadcast class PipelineModule(flow.nn.Module): def __init__(self): super().__init__() self.layer_0 = StageLayerModule() self.layer_1 = StageLayerModule() self.layer_0.to_consistent(P0, B) self.layer_1.to_consistent(P1, B) def forward(self, x): # stage 0 in0 = x.to_consistent(P0, B) out0 = self.layer_0(in0) # stage 1 in1 = out0.to_consistent(P1, B) out1 = self.layer_1(in1) return out1 pp_m = PipelineModule() pp_m.eval() class PipelineGraph(flow.nn.Graph): def __init__(self): super().__init__() self.pp_m = pp_m def build(self, x): return self.pp_m(x) pp_g = PipelineGraph() for i in range(500): x = flow.randn(16, 10) x = x.to_consistent(P0, B) out = pp_g(x)
def test_save_and_load_consistent_from_nested_dict(test_case): class CustomModule(flow.nn.Module): def __init__(self): super().__init__() self.param = flow.nn.Parameter(flow.randn(3, 32, 3, 3)) def forward(self): return self.param m1 = CustomModule() m1 = m1.to_consistent(flow.placement("cuda", {0: range(2)}), flow.sbp.broadcast) m2 = CustomModule() m2 = m2.to_consistent(flow.placement("cuda", {0: range(2)}), flow.sbp.broadcast) res1 = m1() + m2() state_dict1 = m1.state_dict() state_dict2 = m2.state_dict() state_dict = {"m1": state_dict1, "m2": state_dict2} with tempfile.TemporaryDirectory() as f: with test_case.assertRaises(Exception): flow.save(state_dict, f) consistent_src_dst_rank = 0 flow.save(state_dict, f, consistent_dst_rank=consistent_src_dst_rank) rank = flow.env.get_rank() if rank != consistent_src_dst_rank: test_case.assertEqual(len(os.listdir(f)), 0) m1 = CustomModule() m1 = m1.to_consistent(flow.placement("cuda", {0: range(2)}), flow.sbp.broadcast) m2 = CustomModule() m2 = m2.to_consistent(flow.placement("cuda", {0: range(2)}), flow.sbp.broadcast) with test_case.assertRaises(Exception): loaded_state_dict = flow.load(f) m1.load_state_dict(loaded_state_dict["m1"]) loaded_state_dict = flow.load( f, consistent_src_rank=consistent_src_dst_rank) test_case.assertEqual(len(loaded_state_dict), 2) m1.load_state_dict(loaded_state_dict["m1"]) m2.load_state_dict(loaded_state_dict["m2"]) res2 = m1() + m2() test_case.assertTrue( np.array_equal( res1.to_consistent(sbp=flow.sbp.broadcast).to_local().numpy(), res2.to_consistent(sbp=flow.sbp.broadcast).to_local().numpy(), ))
def test_multi_input_with_diff_placement(test_case): x = flow.tensor([1, 2, 3, 4], placement=flow.placement("cuda", [0]), sbp=flow.sbp.broadcast) y = flow.tensor([2, 4, 6, 8], placement=flow.placement("cuda", [1]), sbp=flow.sbp.broadcast) with test_case.assertRaises(RuntimeError) as ctx: z = flow.add(x, y) test_case.assertTrue( "Expected all tensors to be on the same placement, but found at least two placements" in str(ctx.exception))
def test_copy(test_case): x = flow.zeros(2, 3) y = flow.ones(2, 3) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) y = flow.ones(4, 6, placement=flow.placement("cpu", [0]), sbp=flow.sbp.broadcast) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) y = flow.ones(4, 6, placement=flow.placement("cuda", [0]), sbp=flow.sbp.broadcast) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.split(0)) y = flow.ones(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) y = flow.ones(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y.numpy())) x = flow.zeros(4, 6, placement=flow.placement("cuda", [0, 1]), sbp=flow.sbp.broadcast) y = np.ones((4, 6), dtype=np.float32) x.copy_(y) test_case.assertTrue(np.array_equal(x.numpy(), y))
def test_consistent_set_data(test_case): x_placement = flow.placement("cpu", {0: 0}) x_sbp = flow.sbp.broadcast x = flow.ones(2, 3, placement=x_placement, sbp=x_sbp) y_placement = flow.placement("cuda", {0: 0}) y_sbp = flow.sbp.split(0) y = flow.ones(4, 5, placement=y_placement, sbp=y_sbp) old_id = id(x) x.data = y test_case.assertEqual(old_id, id(x)) test_case.assertTrue(x.shape == (4, 5)) test_case.assertTrue(x.placement == y_placement) test_case.assertTrue(x.sbp[0] == y_sbp)
def test_lazy_1d_to_2d_sbp(test_case): P_1d = flow.placement( device_type="cuda", device_ids={0: range(4)}, hierarchy=(4,) ) P_2d = flow.placement( device_type="cuda", device_ids={0: range(4)}, hierarchy=(2, 2) ) B = flow.sbp.broadcast class Test1dTo2dModule(flow.nn.Module): def forward(self, x): return x.to_global(placement=P_2d, sbp=[B, B]) class Test1dTo2dGraph(flow.nn.Graph): def __init__(self, model): super().__init__() self.model = model def build(self, x): return self.model(x) class Test2dTo1dModule(flow.nn.Module): def forward(self, x): return x.to_global(placement=P_1d, sbp=[B]) class Test2dTo1dGraph(flow.nn.Graph): def __init__(self, model): super().__init__() self.model = model def build(self, x): return self.model(x) model_1d_to_2d = Test1dTo2dModule() graph_1d_to_2d = Test1dTo2dGraph(model_1d_to_2d) x = flow.zeros(4, 4, 4, 4, sbp=[B, B], placement=P_2d) x = x.to_global(placement=P_1d, sbp=[B]) test_case.assertTrue(x.sbp == (B,)) test_case.assertTrue(x.placement == P_1d) y = graph_1d_to_2d(x) test_case.assertTrue(y.sbp == (B, B)) test_case.assertTrue(y.placement == P_2d) model_2d_to_1d = Test2dTo1dModule() graph_2d_to_1d = Test2dTo1dGraph(model_2d_to_1d) z = graph_2d_to_1d(y) test_case.assertTrue(z.sbp == x.sbp) test_case.assertTrue(z.placement == x.placement)
def test_rand_consistent(test_case): arg_dict = OrderedDict() arg_dict["test_fun"] = [ _test_consistent_rand, _test_consistent_rand_graph ] arg_dict["low"] = [i for i in range(2)] arg_dict["high"] = [1000 + np.random.randint(1, 10) for i in range(2)] arg_dict["shape"] = [(2, 3, 4), (2, 5, 2)] arg_dict["placement"] = [ flow.placement("cpu", {0: [0, 1]}), flow.placement("cuda", {0: [0, 1]}), ] arg_dict["sbp"] = [(flow.sbp.broadcast, ), (flow.sbp.split(0), )] for arg in GenArgList(arg_dict): arg[0](test_case, *arg[1:])
def test_fwd_P2B(test_case): """ compare eager fwd and lazy bwd """ rank = flow.env.get_rank() # pid = os.getpid() # print(f"[{pid}][{rank}] ToGlobalGraphTestCase.test_fwd_P2B") local_x = flow.tensor(x, dtype=flow.float32, device=flow.device(f"cuda:{rank}")) local_y = flow.tensor(y, dtype=flow.float32, device=flow.device(f"cuda:{rank}")) z = flow._C.matmul( flow.cat([local_x, local_x], dim=1), flow.cat([local_y, local_y], dim=1), transpose_b=True, ) z = flow._C.relu(z) # print(f"z shape: {z.shape}, device: {z.device}") # print(z.numpy()) placement = flow.placement("cuda", ranks=[0, 1]) sbp = flow.sbp.split(1) c_x = local_x.to_global(placement=placement, sbp=sbp) c_y = local_y.to_global(placement=placement, sbp=sbp) # print(f"c_x shape: {c_x.shape}, placement: {c_x.placement}, sbp: {c_x.sbp}") # print(f"c_y shape: {c_y.shape}, placement: {c_y.placement}, sbp: {c_y.sbp}") m = MyModule1(c_y) g = MyGraph(m) g_z = g(c_x) # print(f"g_z shape: {g_z.shape}, placement: {g_z.placement}, sbp: {g_z.sbp}") # print(g_z.to_local().numpy()) test_case.assertTrue(np.allclose(z.numpy(), g_z.to_local().numpy()))
def test_consistent_tensor_2d_sbp_init(test_case): V = 10 H = 4 S = 6 P = flow.placement("cuda", {0: [0, 1, 2, 3]}, (2, 2)) wte = flow.nn.Parameter( flow.empty( (V, H), dtype=flow.float32, placement=P, sbp=[flow.sbp.broadcast, flow.sbp.split(0)], ) ) wpe = flow.nn.Parameter( flow.empty( (S, H), dtype=flow.float32, placement=P, sbp=[flow.sbp.broadcast, flow.sbp.broadcast], ) ) flow.nn.init.normal_(wte, std=0.02) flow.nn.init.normal_(wpe, std=0.02)
def test_case1(test_case): rank = flow.env.get_rank() # print( # f"GPTDataLoaderDistributedTestCase.test_case1 on rank {rank} {os.getpid()}" # ) eager_gpt_loader = GPTDataLoader(batch_size=4, device=flow.device("cpu", rank)) consistent_gpt_loader = GPTDataLoader( batch_size=8, placement=flow.placement("cpu", {0: [0, 1]}), sbp=[flow.sbp.split(0)], ) gpt_loader_graph = DataLoaderGraph(consistent_gpt_loader) iteration = 2 for i in range(iteration): tokens = eager_gpt_loader() # print( # f"rank {rank} tokens: {tokens.shape}, {tokens.dtype}, device: {tokens.device}" # f"\n{tokens.numpy()}" # ) g_tokens = gpt_loader_graph() # print( # f"rank {rank} graph output tokens: {g_tokens.shape}, {g_tokens.dtype}" # f", placement: {g_tokens.placement}" # f"\n{g_tokens.to_local().numpy()}" # ) # print(f"{'-' * 20} rank {rank} iter {i} complete {'-' * 20}") test_case.assertTrue( np.allclose(tokens.numpy(), g_tokens.to_local().numpy()) )
def _test_expand_same_dim_split(test_case, device): input_shape = (4, 1, 2, 1) expand_dim = [4, 1, 2, 4] input_nd = np.random.random(size=input_shape).astype(np.float32) torch_in = torch.tensor(input_nd, requires_grad=True) torch_out = torch_in.expand(*expand_dim) torch_out.sum().backward() of_input = flow.tensor(input_nd, dtype=flow.float32, requires_grad=True) of_input = of_input.to_consistent( placement=flow.placement(device, {0: [0, 1]}), sbp=flow.sbp.broadcast, ) of_input = of_input.to_consistent(sbp=flow.sbp.split(0)) of_out = of_input.expand(*expand_dim) loss = of_out.sum() loss.backward() if flow.env.get_rank() == 0: test_case.assertTrue( np.array_equal( of_out.to_local().numpy(), torch_out.detach().cpu().numpy()[0:2, :, :, :], )) test_case.assertTrue( np.array_equal( of_input.grad.to_local().numpy(), torch_in.grad.cpu().numpy()[0:2, :, :, :], ))
def _test_consistent_tensor_str(test_case, device): placement = flow.placement(device, {0: range(1)}) # split consistent tensor x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.split(0)]) tensor_str = str(x) test_case.assertTrue("1." in tensor_str) # broadcast consistent tensor x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.broadcast]) tensor_str = str(x) test_case.assertTrue("1." in tensor_str) # partial_sum consistent tensor x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.partial_sum]) tensor_str = str(x) test_case.assertTrue("1." in tensor_str) # summarized consistent tensor x = flow.ones((100, 100), placement=placement, sbp=[flow.sbp.split(0)]) tensor_str = str(x) test_case.assertTrue("1." in tensor_str) test_case.assertTrue("..." in tensor_str) # empty consistent tensor x = flow.ones((0, 10), placement=placement, sbp=[flow.sbp.split(0)]) tensor_str = str(x) test_case.assertTrue("[]" in tensor_str)
def _test_expand_same_dim_negative_broadcast(test_case, device): input_shape = (2, 1, 4, 1) expand_dim = [2, -1, 4, 4] input_nd = np.random.random(size=input_shape).astype(np.float32) torch_in = torch.tensor(input_nd, requires_grad=True) torch_out = torch_in.expand(*expand_dim) torch_out.sum().backward() of_input = flow.tensor(input_nd, dtype=flow.float32, requires_grad=True) global_of_input = of_input.to_global( placement=flow.placement(device, ranks=[0, 1]), sbp=flow.sbp.broadcast, ) of_out = global_of_input.expand(*expand_dim) loss = of_out.sum() loss.backward() if flow.env.get_rank() == 0: test_case.assertTrue( np.array_equal(of_out.to_local().numpy(), torch_out.detach().cpu().numpy()) ) test_case.assertTrue( np.array_equal(of_input.grad.numpy(), torch_in.grad.cpu().numpy()) )
def get_layer_placement(layer_idx, device_type="cuda"): dist_util = get_dist_util() return flow.placement( device_type, dist_util.get_layer_devices(layer_idx), dist_util.parallel_hierarchy, )
def test_module_to_consistent(test_case): rank = flow.env.get_rank() P = flow.placement("cuda", {0: [0, 1]}) B = flow.sbp.broadcast class ReuseVarModule(flow.nn.Module): def __init__(self): super().__init__() self.linear1 = flow.nn.Linear(3, 4) self.linear2 = flow.nn.Linear(3, 4) self.linear2.weight = self.linear1.weight reuse_var_m = ReuseVarModule() test_case.assertTrue( reuse_var_m.linear1.weight is reuse_var_m.linear2.weight) test_case.assertEqual(reuse_var_m.linear1.weight.device, flow.device("cpu", rank)) test_case.assertTrue( reuse_var_m.linear1.bias is not reuse_var_m.linear2.bias) test_case.assertEqual(reuse_var_m.linear1.bias.device, flow.device("cpu", rank)) reuse_var_m.to_consistent(placement=P, sbp=B) test_case.assertTrue( reuse_var_m.linear1.weight is reuse_var_m.linear2.weight) test_case.assertEqual(reuse_var_m.linear1.weight.placement, P) test_case.assertEqual(reuse_var_m.linear1.weight.sbp[0], B) test_case.assertTrue( reuse_var_m.linear1.bias is not reuse_var_m.linear2.bias) test_case.assertEqual(reuse_var_m.linear1.bias.placement, P) test_case.assertEqual(reuse_var_m.linear1.bias.sbp[0], B)
def test_global_eager_tensor_to(test_case): rank = flow.env.get_rank() placement = flow.placement("cpu", ranks=[0, 1]) t_l = flow.tensor([1.0, 2.0], dtype=flow.float32) t = t_l.to_global(placement=placement, sbp=flow.sbp.broadcast) class ConsistentEagerTensorToModule(flow.nn.Module): def __init__(self): super().__init__() def forward(self): # test free eager tensor to nonlocal t t = t.to("cuda") return t e_m = ConsistentEagerTensorToModule() class ConsistentEagerTensorToGraph(flow.nn.Graph): def __init__(self): super().__init__() self.e_m = e_m def build(self): return self.e_m() e_g = ConsistentEagerTensorToGraph() graph_out = e_g().to_local() print("g ", graph_out.numpy()) test_case.assertTrue( np.allclose(graph_out.numpy(), t_l.numpy(), atol=1e-4, rtol=1e-4))
def test_to_dtype(test_case): x = flow.ones((2, 3), dtype=flow.int32, device="cpu") placement = flow.placement("cpu", ranks=[0, 1]) c_x = flow.ones( (2, 3), dtype=flow.int32, placement=placement, sbp=flow.sbp.broadcast ) class CastModule(flow.nn.Module): def __init__(self, dtype): super().__init__() self.dtype = dtype def forward(self, x): return x.to(dtype=self.dtype) m = CastModule(flow.float32) g = MyGraph(m) e_x = m(x) e_c_x = m(c_x) # NOTE(chengcheng): # There are two BUG in this test script: # 1. first call and second call input tensor meta is NOT same # 2. nn.Graph NOT support local input with multi-rank yet. # g_x = g(x) g_c_x = g(c_x) test_case.assertTrue(e_x.dtype == flow.float32) # test_case.assertTrue(g_x.dtype == flow.float32) test_case.assertTrue(e_c_x.dtype == flow.float32) test_case.assertTrue(g_c_x.dtype == flow.float32)
def test_stateful_local_kernel_in_global_mode(test_case): rank = int(os.getenv("RANK")) x = flow.tensor(np.array([1, 2]) * (rank + 1)).to("cuda") x = x.to_global(flow.placement("cuda", range(2)), flow.sbp.split(0)) y = flow.tensor([3, 4, 5]).to("cuda") y = y.to_global(flow.placement("cuda", range(2)), flow.sbp.broadcast) # logical slice assign op needs sbp and logical shape from stateful local opkernel x[:3] = y x = x.to_global(sbp=flow.sbp.broadcast) test_case.assertTrue( np.array_equal(x.to_local().numpy(), np.array([3, 4, 5, 4])))
def __init__(self, data, requires_grad=True): # TODO: uncomment this line when autograd is ready # data.requires_grad = True data.set_is_consistent(True) # TODO: set a proper placement data.set_placement(flow.placement("gpu", ["0:0"], None)) self._data = data
def _test_global_tensor_str(test_case, device): placement = flow.placement(device, range(1)) # split global tensor x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.split(0)]) tensor_str = str(x) test_case.assertTrue("1." in tensor_str) # broadcast global tensor x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.broadcast]) tensor_str = str(x) test_case.assertTrue("1." in tensor_str) # partial_sum global tensor x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.partial_sum]) tensor_str = str(x) test_case.assertTrue("1." in tensor_str) # summarized global tensor x = flow.ones((100, 100), placement=placement, sbp=[flow.sbp.split(0)]) tensor_str = str(x) test_case.assertTrue("1." in tensor_str) test_case.assertTrue("..." in tensor_str) # empty global tensor x = flow.ones((0, 10), placement=placement, sbp=[flow.sbp.split(0)]) tensor_str = str(x) test_case.assertTrue("[]" in tensor_str)
def test_meshgrid_tensors_placement_runtime_error(test_case): with test_case.assertRaises(Exception) as context: x1 = flow.tensor( [0.0, 1.0], dtype=flow.float32, placement=flow.placement("cpu", ranks=[0]), sbp=[flow.sbp.broadcast], ) x2 = flow.tensor( [0.0, 1.0], dtype=flow.float32, placement=flow.placement("cpu", ranks=[0]), sbp=[flow.sbp.broadcast], ).to_local() y = flow.meshgrid(x1, x2) test_case.assertTrue("meshgrid expects all tensors are global tensor" in str(context.exception))