def forward(self, x):
        sbp_1ds = [
            flow.sbp.broadcast,
            flow.sbp.partial_sum,
            flow.sbp.split(0),
            flow.sbp.split(1),
            flow.sbp.split(2),
            flow.sbp.split(3),
        ]

        for sbp1 in sbp_1ds:

            for sbp2 in sbp_1ds:
                for sbp3 in sbp_1ds:
                    # (2, 2) -> 3
                    # 4 is not divisible by 3
                    x = x.to_global(
                        placement=flow.placement(type="cuda", ranks=np.array(range(3))),
                        sbp=[sbp1],
                    )
                    # 3 -> (2, 2)
                    x = x.to_global(
                        placement=flow.placement(
                            type="cuda", ranks=np.array(range(4)).reshape(2, 2)
                        ),
                        sbp=[sbp2, sbp3],
                    )

        return x
Exemple #2
0
    def forward(self, x):
        sbp_1ds = [
            flow.sbp.broadcast,
            flow.sbp.partial_sum,
            flow.sbp.split(0),
            flow.sbp.split(1),
        ]

        for sbp1 in sbp_1ds:
            for sbp2 in sbp_1ds:

                for sbp3 in sbp_1ds:
                    for sbp4 in sbp_1ds:
                        # (3, 2) -> (2, 2)
                        x = x.to_global(
                            placement=flow.placement(
                                type="cuda",
                                ranks=np.array(range(4)).reshape(2, 2)),
                            sbp=[sbp1, sbp2],
                        )
                        # (2, 2) -> (3, 2)
                        x = x.to_global(
                            placement=flow.placement(
                                type="cuda",
                                ranks=np.array(range(6)).reshape(3, 2)),
                            sbp=[sbp3, sbp4],
                        )

        return x
 def test_rand_graph(test_case):
     arg_dict = OrderedDict()
     arg_dict["shape"] = [(8, ), (
         8,
         8,
     ), (8, 8, 8)]
     arg_dict["placement"] = [
         # 1d
         flow.placement("cpu", ranks=[0, 1]),
         flow.placement("cuda", ranks=[0, 1]),
         # 2d
         flow.placement("cpu", ranks=[
             [0, 1],
         ]),
         flow.placement("cuda", ranks=[
             [0, 1],
         ]),
     ]
     for args in GenArgDict(arg_dict):
         shape = args["shape"]
         placement = args["placement"]
         for sbp in all_sbp(placement,
                            max_dim=len(shape),
                            except_partial_sum=True):
             _test_graph_rand(test_case, shape, placement, sbp)
 def test_randperm_graph(test_case):
     arg_dict = OrderedDict()
     arg_dict["N"] = [i for i in range(10, 50, 10)]
     arg_dict["placement"] = [
         # 1d
         flow.placement("cpu", ranks=[0, 1]),
         flow.placement("cuda", ranks=[0, 1]),
         # 2d
         flow.placement("cpu", ranks=[
             [0, 1],
         ]),
         flow.placement("cuda", ranks=[
             [0, 1],
         ]),
     ]
     arg_dict["dtype"] = [
         flow.uint8,
         flow.int8,
         flow.int32,
         flow.int64,
         flow.float32,
         flow.float64,
     ]
     for args in GenArgDict(arg_dict):
         N = args["N"]
         placement = args["placement"]
         dtype = args["dtype"]
         for sbp in all_sbp(placement, max_dim=1, except_partial_sum=True):
             _test_graph_randperm(test_case, N, placement, sbp, dtype)
 def test_constant_graph(test_case):
     arg_dict = OrderedDict()
     arg_dict["func"] = ["ones", "zeros", "new_zeros"]
     arg_dict["shape"] = [(8, ), (
         8,
         8,
     ), (8, 8, 8)]
     arg_dict["placement"] = [
         # 1d
         flow.placement("cpu", ranks=[0, 1]),
         flow.placement("cuda", ranks=[0, 1]),
         # 2d
         flow.placement("cpu", ranks=[
             [0, 1],
         ]),
         flow.placement("cuda", ranks=[
             [0, 1],
         ]),
     ]
     for args in GenArgDict(arg_dict):
         func = args["func"]
         shape = args["shape"]
         placement = args["placement"]
         for sbp in all_sbp(placement,
                            max_dim=len(shape),
                            except_partial_sum=True):
             _test_graph_constant(test_case, func, shape, placement, sbp)
Exemple #6
0
    def test_to_placement(test_case):
        rank = flow.env.get_rank()
        # pid = os.getpid()
        # print(f"[{pid}][{rank}] ToConsistentGraphTestCase.test_to_placement")

        if rank == 0:
            x = flow.ones((2, 3), dtype=flow.float32)
        elif rank == 1:
            x = flow.empty(tuple())
        else:
            raise ValueError

        c_x = x.to_consistent(placement=flow.placement("cpu", {0: [0]}),
                              sbp=flow.sbp.broadcast)
        # print(f"c_x shape: {c_x.shape}, placment: {c_x.placement}, sbp: {c_x.sbp}")

        p1 = flow.placement("cpu", {0: [0, 1]})
        m1 = ToPlacementModule(p1)
        g1 = MyGraph(m1)
        y1 = g1(c_x)

        # print(f"y1 shape: {y1.shape}, placment: {y1.placement}, sbp: {y1.sbp}")
        test_case.assertTrue(y1.placement == p1)
        test_case.assertTrue(y1.sbp[0] == flow.sbp.broadcast)
        test_case.assertTrue(y1.to_local().numpy().mean() == 1.0)

        p2 = flow.placement("cuda", {0: [0, 1]})
        m2 = ToPlacementModule(p2)
        g2 = MyGraph(m2)
        y2 = g2(y1)

        # print(f"y2 shape: {y2.shape}, placment: {y2.placement}, sbp: {y2.sbp}")
        test_case.assertTrue(y2.placement == p2)
        test_case.assertTrue(y2.sbp[0] == flow.sbp.broadcast)
        test_case.assertTrue(y2.to_local().numpy().mean() == 1.0)
Exemple #7
0
 def test_creating_consistent_tensor(test_case):
     shape = (2, 3)
     x = flow.Tensor(*shape, placement=flow.placement("gpu", ["0:0"], None))
     x.set_placement(flow.placement("cpu", ["0:0"], None))
     x.set_is_consistent(True)
     test_case.assertTrue(not x.is_cuda)
     x.determine()
 def test_graph_inplace_cpu(test_case):
     x = flow.randn(10,
                    10,
                    placement=flow.placement("cpu", ranks=[0, 1]),
                    sbp=flow.sbp.split(1))
     y = flow.ones(10,
                   placement=flow.placement("cpu", ranks=[0, 1]),
                   sbp=flow.sbp.broadcast)
     _test_graph_lazy_inplace(test_case, x, y)
 def test_graph_inplace_gpu(test_case):
     x = flow.randn(10,
                    10,
                    placement=flow.placement("cuda", {0: [0, 1]}),
                    sbp=flow.sbp.split(1))
     y = flow.ones(10,
                   placement=flow.placement("cuda", {0: [0, 1]}),
                   sbp=flow.sbp.broadcast)
     _test_graph_lazy_inplace(test_case, x, y)
def _test_graph_buffer_limit(test_case):
    class StageLayerModule(flow.nn.Module):
        def __init__(self):
            super().__init__()
            self.linear1 = flow.nn.Linear(10, 8, False)
            self.linear2 = flow.nn.Linear(8, 10, False)
            flow.nn.init.constant_(self.linear1.weight, 0.023)
            flow.nn.init.constant_(self.linear2.weight, 1.23)

        def forward(self, x):
            out0 = self.linear1(x)
            out0 = out0 + 1.0
            out0 = out0 * 2.0
            out1 = self.linear2(out0)
            return out1

    P0 = flow.placement("cuda", {0: [0]})
    P1 = flow.placement("cuda", {0: [1]})
    PT = flow.placement("cuda", {0: [0, 1]})
    B = flow.sbp.broadcast

    class PipelineModule(flow.nn.Module):
        def __init__(self):
            super().__init__()
            self.layer_0 = StageLayerModule()
            self.layer_1 = StageLayerModule()
            self.layer_0.to_consistent(P0, B)
            self.layer_1.to_consistent(P1, B)

        def forward(self, x):
            # stage 0
            in0 = x.to_consistent(P0, B)
            out0 = self.layer_0(in0)
            # stage 1
            in1 = out0.to_consistent(P1, B)
            out1 = self.layer_1(in1)
            return out1

    pp_m = PipelineModule()
    pp_m.eval()

    class PipelineGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.pp_m = pp_m

        def build(self, x):
            return self.pp_m(x)

    pp_g = PipelineGraph()

    for i in range(500):
        x = flow.randn(16, 10)
        x = x.to_consistent(P0, B)
        out = pp_g(x)
Exemple #11
0
    def test_save_and_load_consistent_from_nested_dict(test_case):
        class CustomModule(flow.nn.Module):
            def __init__(self):
                super().__init__()
                self.param = flow.nn.Parameter(flow.randn(3, 32, 3, 3))

            def forward(self):
                return self.param

        m1 = CustomModule()
        m1 = m1.to_consistent(flow.placement("cuda", {0: range(2)}),
                              flow.sbp.broadcast)
        m2 = CustomModule()
        m2 = m2.to_consistent(flow.placement("cuda", {0: range(2)}),
                              flow.sbp.broadcast)
        res1 = m1() + m2()
        state_dict1 = m1.state_dict()
        state_dict2 = m2.state_dict()
        state_dict = {"m1": state_dict1, "m2": state_dict2}

        with tempfile.TemporaryDirectory() as f:
            with test_case.assertRaises(Exception):
                flow.save(state_dict, f)

            consistent_src_dst_rank = 0
            flow.save(state_dict,
                      f,
                      consistent_dst_rank=consistent_src_dst_rank)
            rank = flow.env.get_rank()
            if rank != consistent_src_dst_rank:
                test_case.assertEqual(len(os.listdir(f)), 0)

            m1 = CustomModule()
            m1 = m1.to_consistent(flow.placement("cuda", {0: range(2)}),
                                  flow.sbp.broadcast)
            m2 = CustomModule()
            m2 = m2.to_consistent(flow.placement("cuda", {0: range(2)}),
                                  flow.sbp.broadcast)

            with test_case.assertRaises(Exception):
                loaded_state_dict = flow.load(f)
                m1.load_state_dict(loaded_state_dict["m1"])

            loaded_state_dict = flow.load(
                f, consistent_src_rank=consistent_src_dst_rank)
            test_case.assertEqual(len(loaded_state_dict), 2)
            m1.load_state_dict(loaded_state_dict["m1"])
            m2.load_state_dict(loaded_state_dict["m2"])
            res2 = m1() + m2()

        test_case.assertTrue(
            np.array_equal(
                res1.to_consistent(sbp=flow.sbp.broadcast).to_local().numpy(),
                res2.to_consistent(sbp=flow.sbp.broadcast).to_local().numpy(),
            ))
Exemple #12
0
 def test_multi_input_with_diff_placement(test_case):
     x = flow.tensor([1, 2, 3, 4],
                     placement=flow.placement("cuda", [0]),
                     sbp=flow.sbp.broadcast)
     y = flow.tensor([2, 4, 6, 8],
                     placement=flow.placement("cuda", [1]),
                     sbp=flow.sbp.broadcast)
     with test_case.assertRaises(RuntimeError) as ctx:
         z = flow.add(x, y)
     test_case.assertTrue(
         "Expected all tensors to be on the same placement, but found at least two placements"
         in str(ctx.exception))
    def test_copy(test_case):
        x = flow.zeros(2, 3)
        y = flow.ones(2, 3)
        x.copy_(y)
        test_case.assertTrue(np.array_equal(x.numpy(), y.numpy()))

        x = flow.zeros(4,
                       6,
                       placement=flow.placement("cuda", [0, 1]),
                       sbp=flow.sbp.broadcast)
        y = flow.ones(4,
                      6,
                      placement=flow.placement("cpu", [0]),
                      sbp=flow.sbp.broadcast)
        x.copy_(y)
        test_case.assertTrue(np.array_equal(x.numpy(), y.numpy()))

        x = flow.zeros(4,
                       6,
                       placement=flow.placement("cuda", [0, 1]),
                       sbp=flow.sbp.broadcast)
        y = flow.ones(4,
                      6,
                      placement=flow.placement("cuda", [0]),
                      sbp=flow.sbp.broadcast)
        x.copy_(y)
        test_case.assertTrue(np.array_equal(x.numpy(), y.numpy()))

        x = flow.zeros(4,
                       6,
                       placement=flow.placement("cuda", [0, 1]),
                       sbp=flow.sbp.split(0))
        y = flow.ones(4,
                      6,
                      placement=flow.placement("cuda", [0, 1]),
                      sbp=flow.sbp.broadcast)
        x.copy_(y)
        test_case.assertTrue(np.array_equal(x.numpy(), y.numpy()))

        x = flow.zeros(4,
                       6,
                       placement=flow.placement("cuda", [0, 1]),
                       sbp=flow.sbp.broadcast)
        y = flow.ones(4,
                      6,
                      placement=flow.placement("cuda", [0, 1]),
                      sbp=flow.sbp.broadcast)
        x.copy_(y)
        test_case.assertTrue(np.array_equal(x.numpy(), y.numpy()))

        x = flow.zeros(4,
                       6,
                       placement=flow.placement("cuda", [0, 1]),
                       sbp=flow.sbp.broadcast)
        y = np.ones((4, 6), dtype=np.float32)
        x.copy_(y)
        test_case.assertTrue(np.array_equal(x.numpy(), y))
Exemple #14
0
 def test_consistent_set_data(test_case):
     x_placement = flow.placement("cpu", {0: 0})
     x_sbp = flow.sbp.broadcast
     x = flow.ones(2, 3, placement=x_placement, sbp=x_sbp)
     y_placement = flow.placement("cuda", {0: 0})
     y_sbp = flow.sbp.split(0)
     y = flow.ones(4, 5, placement=y_placement, sbp=y_sbp)
     old_id = id(x)
     x.data = y
     test_case.assertEqual(old_id, id(x))
     test_case.assertTrue(x.shape == (4, 5))
     test_case.assertTrue(x.placement == y_placement)
     test_case.assertTrue(x.sbp[0] == y_sbp)
Exemple #15
0
    def test_lazy_1d_to_2d_sbp(test_case):
        P_1d = flow.placement(
            device_type="cuda", device_ids={0: range(4)}, hierarchy=(4,)
        )
        P_2d = flow.placement(
            device_type="cuda", device_ids={0: range(4)}, hierarchy=(2, 2)
        )
        B = flow.sbp.broadcast

        class Test1dTo2dModule(flow.nn.Module):
            def forward(self, x):
                return x.to_global(placement=P_2d, sbp=[B, B])

        class Test1dTo2dGraph(flow.nn.Graph):
            def __init__(self, model):
                super().__init__()
                self.model = model

            def build(self, x):
                return self.model(x)

        class Test2dTo1dModule(flow.nn.Module):
            def forward(self, x):
                return x.to_global(placement=P_1d, sbp=[B])

        class Test2dTo1dGraph(flow.nn.Graph):
            def __init__(self, model):
                super().__init__()
                self.model = model

            def build(self, x):
                return self.model(x)

        model_1d_to_2d = Test1dTo2dModule()
        graph_1d_to_2d = Test1dTo2dGraph(model_1d_to_2d)

        x = flow.zeros(4, 4, 4, 4, sbp=[B, B], placement=P_2d)
        x = x.to_global(placement=P_1d, sbp=[B])
        test_case.assertTrue(x.sbp == (B,))
        test_case.assertTrue(x.placement == P_1d)
        y = graph_1d_to_2d(x)
        test_case.assertTrue(y.sbp == (B, B))
        test_case.assertTrue(y.placement == P_2d)

        model_2d_to_1d = Test2dTo1dModule()
        graph_2d_to_1d = Test2dTo1dGraph(model_2d_to_1d)
        z = graph_2d_to_1d(y)
        test_case.assertTrue(z.sbp == x.sbp)
        test_case.assertTrue(z.placement == x.placement)
Exemple #16
0
 def test_rand_consistent(test_case):
     arg_dict = OrderedDict()
     arg_dict["test_fun"] = [
         _test_consistent_rand, _test_consistent_rand_graph
     ]
     arg_dict["low"] = [i for i in range(2)]
     arg_dict["high"] = [1000 + np.random.randint(1, 10) for i in range(2)]
     arg_dict["shape"] = [(2, 3, 4), (2, 5, 2)]
     arg_dict["placement"] = [
         flow.placement("cpu", {0: [0, 1]}),
         flow.placement("cuda", {0: [0, 1]}),
     ]
     arg_dict["sbp"] = [(flow.sbp.broadcast, ), (flow.sbp.split(0), )]
     for arg in GenArgList(arg_dict):
         arg[0](test_case, *arg[1:])
Exemple #17
0
    def test_fwd_P2B(test_case):
        """ compare eager fwd and lazy bwd
        """
        rank = flow.env.get_rank()
        # pid = os.getpid()
        # print(f"[{pid}][{rank}] ToGlobalGraphTestCase.test_fwd_P2B")

        local_x = flow.tensor(x, dtype=flow.float32, device=flow.device(f"cuda:{rank}"))
        local_y = flow.tensor(y, dtype=flow.float32, device=flow.device(f"cuda:{rank}"))

        z = flow._C.matmul(
            flow.cat([local_x, local_x], dim=1),
            flow.cat([local_y, local_y], dim=1),
            transpose_b=True,
        )
        z = flow._C.relu(z)
        # print(f"z shape: {z.shape}, device: {z.device}")
        # print(z.numpy())

        placement = flow.placement("cuda", ranks=[0, 1])
        sbp = flow.sbp.split(1)
        c_x = local_x.to_global(placement=placement, sbp=sbp)
        c_y = local_y.to_global(placement=placement, sbp=sbp)

        # print(f"c_x shape: {c_x.shape}, placement: {c_x.placement}, sbp: {c_x.sbp}")
        # print(f"c_y shape: {c_y.shape}, placement: {c_y.placement}, sbp: {c_y.sbp}")

        m = MyModule1(c_y)
        g = MyGraph(m)

        g_z = g(c_x)
        # print(f"g_z shape: {g_z.shape}, placement: {g_z.placement}, sbp: {g_z.sbp}")
        # print(g_z.to_local().numpy())
        test_case.assertTrue(np.allclose(z.numpy(), g_z.to_local().numpy()))
Exemple #18
0
    def test_consistent_tensor_2d_sbp_init(test_case):
        V = 10
        H = 4
        S = 6

        P = flow.placement("cuda", {0: [0, 1, 2, 3]}, (2, 2))

        wte = flow.nn.Parameter(
            flow.empty(
                (V, H),
                dtype=flow.float32,
                placement=P,
                sbp=[flow.sbp.broadcast, flow.sbp.split(0)],
            )
        )

        wpe = flow.nn.Parameter(
            flow.empty(
                (S, H),
                dtype=flow.float32,
                placement=P,
                sbp=[flow.sbp.broadcast, flow.sbp.broadcast],
            )
        )

        flow.nn.init.normal_(wte, std=0.02)
        flow.nn.init.normal_(wpe, std=0.02)
Exemple #19
0
    def test_case1(test_case):
        rank = flow.env.get_rank()
        # print(
        #     f"GPTDataLoaderDistributedTestCase.test_case1 on rank {rank} {os.getpid()}"
        # )
        eager_gpt_loader = GPTDataLoader(batch_size=4, device=flow.device("cpu", rank))

        consistent_gpt_loader = GPTDataLoader(
            batch_size=8,
            placement=flow.placement("cpu", {0: [0, 1]}),
            sbp=[flow.sbp.split(0)],
        )
        gpt_loader_graph = DataLoaderGraph(consistent_gpt_loader)

        iteration = 2
        for i in range(iteration):
            tokens = eager_gpt_loader()
            # print(
            #     f"rank {rank} tokens: {tokens.shape}, {tokens.dtype}, device: {tokens.device}"
            #     f"\n{tokens.numpy()}"
            # )

            g_tokens = gpt_loader_graph()
            # print(
            #     f"rank {rank} graph output tokens: {g_tokens.shape}, {g_tokens.dtype}"
            #     f", placement: {g_tokens.placement}"
            #     f"\n{g_tokens.to_local().numpy()}"
            # )

            # print(f"{'-' * 20} rank {rank} iter {i} complete {'-' * 20}")
            test_case.assertTrue(
                np.allclose(tokens.numpy(), g_tokens.to_local().numpy())
            )
def _test_expand_same_dim_split(test_case, device):
    input_shape = (4, 1, 2, 1)
    expand_dim = [4, 1, 2, 4]

    input_nd = np.random.random(size=input_shape).astype(np.float32)
    torch_in = torch.tensor(input_nd, requires_grad=True)
    torch_out = torch_in.expand(*expand_dim)
    torch_out.sum().backward()

    of_input = flow.tensor(input_nd, dtype=flow.float32, requires_grad=True)
    of_input = of_input.to_consistent(
        placement=flow.placement(device, {0: [0, 1]}),
        sbp=flow.sbp.broadcast,
    )
    of_input = of_input.to_consistent(sbp=flow.sbp.split(0))

    of_out = of_input.expand(*expand_dim)
    loss = of_out.sum()
    loss.backward()

    if flow.env.get_rank() == 0:
        test_case.assertTrue(
            np.array_equal(
                of_out.to_local().numpy(),
                torch_out.detach().cpu().numpy()[0:2, :, :, :],
            ))
        test_case.assertTrue(
            np.array_equal(
                of_input.grad.to_local().numpy(),
                torch_in.grad.cpu().numpy()[0:2, :, :, :],
            ))
Exemple #21
0
def _test_consistent_tensor_str(test_case, device):
    placement = flow.placement(device, {0: range(1)})
    # split consistent tensor
    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.split(0)])
    tensor_str = str(x)
    test_case.assertTrue("1." in tensor_str)

    # broadcast consistent tensor
    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.broadcast])
    tensor_str = str(x)
    test_case.assertTrue("1." in tensor_str)

    # partial_sum consistent tensor
    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.partial_sum])
    tensor_str = str(x)
    test_case.assertTrue("1." in tensor_str)

    # summarized consistent tensor
    x = flow.ones((100, 100), placement=placement, sbp=[flow.sbp.split(0)])
    tensor_str = str(x)
    test_case.assertTrue("1." in tensor_str)
    test_case.assertTrue("..." in tensor_str)

    # empty consistent tensor
    x = flow.ones((0, 10), placement=placement, sbp=[flow.sbp.split(0)])
    tensor_str = str(x)
    test_case.assertTrue("[]" in tensor_str)
def _test_expand_same_dim_negative_broadcast(test_case, device):
    input_shape = (2, 1, 4, 1)
    expand_dim = [2, -1, 4, 4]

    input_nd = np.random.random(size=input_shape).astype(np.float32)
    torch_in = torch.tensor(input_nd, requires_grad=True)
    torch_out = torch_in.expand(*expand_dim)
    torch_out.sum().backward()

    of_input = flow.tensor(input_nd, dtype=flow.float32, requires_grad=True)
    global_of_input = of_input.to_global(
        placement=flow.placement(device, ranks=[0, 1]), sbp=flow.sbp.broadcast,
    )

    of_out = global_of_input.expand(*expand_dim)
    loss = of_out.sum()
    loss.backward()

    if flow.env.get_rank() == 0:
        test_case.assertTrue(
            np.array_equal(of_out.to_local().numpy(), torch_out.detach().cpu().numpy())
        )
        test_case.assertTrue(
            np.array_equal(of_input.grad.numpy(), torch_in.grad.cpu().numpy())
        )
Exemple #23
0
def get_layer_placement(layer_idx, device_type="cuda"):
    dist_util = get_dist_util()
    return flow.placement(
        device_type,
        dist_util.get_layer_devices(layer_idx),
        dist_util.parallel_hierarchy,
    )
    def test_module_to_consistent(test_case):
        rank = flow.env.get_rank()
        P = flow.placement("cuda", {0: [0, 1]})
        B = flow.sbp.broadcast

        class ReuseVarModule(flow.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear1 = flow.nn.Linear(3, 4)
                self.linear2 = flow.nn.Linear(3, 4)
                self.linear2.weight = self.linear1.weight

        reuse_var_m = ReuseVarModule()

        test_case.assertTrue(
            reuse_var_m.linear1.weight is reuse_var_m.linear2.weight)
        test_case.assertEqual(reuse_var_m.linear1.weight.device,
                              flow.device("cpu", rank))

        test_case.assertTrue(
            reuse_var_m.linear1.bias is not reuse_var_m.linear2.bias)
        test_case.assertEqual(reuse_var_m.linear1.bias.device,
                              flow.device("cpu", rank))

        reuse_var_m.to_consistent(placement=P, sbp=B)

        test_case.assertTrue(
            reuse_var_m.linear1.weight is reuse_var_m.linear2.weight)
        test_case.assertEqual(reuse_var_m.linear1.weight.placement, P)
        test_case.assertEqual(reuse_var_m.linear1.weight.sbp[0], B)

        test_case.assertTrue(
            reuse_var_m.linear1.bias is not reuse_var_m.linear2.bias)
        test_case.assertEqual(reuse_var_m.linear1.bias.placement, P)
        test_case.assertEqual(reuse_var_m.linear1.bias.sbp[0], B)
Exemple #25
0
    def test_global_eager_tensor_to(test_case):
        rank = flow.env.get_rank()
        placement = flow.placement("cpu", ranks=[0, 1])
        t_l = flow.tensor([1.0, 2.0], dtype=flow.float32)
        t = t_l.to_global(placement=placement, sbp=flow.sbp.broadcast)

        class ConsistentEagerTensorToModule(flow.nn.Module):
            def __init__(self):
                super().__init__()

            def forward(self):
                # test free eager tensor to
                nonlocal t
                t = t.to("cuda")
                return t

        e_m = ConsistentEagerTensorToModule()

        class ConsistentEagerTensorToGraph(flow.nn.Graph):
            def __init__(self):
                super().__init__()
                self.e_m = e_m

            def build(self):
                return self.e_m()

        e_g = ConsistentEagerTensorToGraph()
        graph_out = e_g().to_local()
        print("g ", graph_out.numpy())
        test_case.assertTrue(
            np.allclose(graph_out.numpy(), t_l.numpy(), atol=1e-4, rtol=1e-4))
Exemple #26
0
    def test_to_dtype(test_case):
        x = flow.ones((2, 3), dtype=flow.int32, device="cpu")

        placement = flow.placement("cpu", ranks=[0, 1])
        c_x = flow.ones(
            (2, 3), dtype=flow.int32, placement=placement, sbp=flow.sbp.broadcast
        )

        class CastModule(flow.nn.Module):
            def __init__(self, dtype):
                super().__init__()
                self.dtype = dtype

            def forward(self, x):
                return x.to(dtype=self.dtype)

        m = CastModule(flow.float32)
        g = MyGraph(m)

        e_x = m(x)
        e_c_x = m(c_x)
        # NOTE(chengcheng):
        #   There are two BUG in this test script:
        #   1. first call and second call input tensor meta is NOT same
        #   2. nn.Graph NOT support local input with multi-rank yet.
        # g_x = g(x)
        g_c_x = g(c_x)

        test_case.assertTrue(e_x.dtype == flow.float32)
        # test_case.assertTrue(g_x.dtype == flow.float32)
        test_case.assertTrue(e_c_x.dtype == flow.float32)
        test_case.assertTrue(g_c_x.dtype == flow.float32)
    def test_stateful_local_kernel_in_global_mode(test_case):
        rank = int(os.getenv("RANK"))

        x = flow.tensor(np.array([1, 2]) * (rank + 1)).to("cuda")
        x = x.to_global(flow.placement("cuda", range(2)), flow.sbp.split(0))

        y = flow.tensor([3, 4, 5]).to("cuda")
        y = y.to_global(flow.placement("cuda", range(2)), flow.sbp.broadcast)

        # logical slice assign op needs sbp and logical shape from stateful local opkernel
        x[:3] = y

        x = x.to_global(sbp=flow.sbp.broadcast)

        test_case.assertTrue(
            np.array_equal(x.to_local().numpy(), np.array([3, 4, 5, 4])))
Exemple #28
0
 def __init__(self, data, requires_grad=True):
     # TODO: uncomment this line when autograd is ready
     # data.requires_grad = True
     data.set_is_consistent(True)
     # TODO: set a proper placement
     data.set_placement(flow.placement("gpu", ["0:0"], None))
     self._data = data
Exemple #29
0
def _test_global_tensor_str(test_case, device):
    placement = flow.placement(device, range(1))
    # split global tensor
    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.split(0)])
    tensor_str = str(x)
    test_case.assertTrue("1." in tensor_str)

    # broadcast global tensor
    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.broadcast])
    tensor_str = str(x)
    test_case.assertTrue("1." in tensor_str)

    # partial_sum global tensor
    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.partial_sum])
    tensor_str = str(x)
    test_case.assertTrue("1." in tensor_str)

    # summarized global tensor
    x = flow.ones((100, 100), placement=placement, sbp=[flow.sbp.split(0)])
    tensor_str = str(x)
    test_case.assertTrue("1." in tensor_str)
    test_case.assertTrue("..." in tensor_str)

    # empty global tensor
    x = flow.ones((0, 10), placement=placement, sbp=[flow.sbp.split(0)])
    tensor_str = str(x)
    test_case.assertTrue("[]" in tensor_str)
 def test_meshgrid_tensors_placement_runtime_error(test_case):
     with test_case.assertRaises(Exception) as context:
         x1 = flow.tensor(
             [0.0, 1.0],
             dtype=flow.float32,
             placement=flow.placement("cpu", ranks=[0]),
             sbp=[flow.sbp.broadcast],
         )
         x2 = flow.tensor(
             [0.0, 1.0],
             dtype=flow.float32,
             placement=flow.placement("cpu", ranks=[0]),
             sbp=[flow.sbp.broadcast],
         ).to_local()
         y = flow.meshgrid(x1, x2)
     test_case.assertTrue("meshgrid expects all tensors are global tensor"
                          in str(context.exception))