def FlowJob(x: flow.typing.Numpy.Placeholder((4, 3, 2, 3),
                                              dtype=flow.float)):
     with flow.scope.placement("gpu", "0:0-3", (2, 2)):
         v1 = flow.get_variable(
             "v1",
             shape=(4, 3, 2, 3),
             dtype=flow.float,
             initializer=flow.constant_initializer(0),
             trainable=True,
             parallel_distribution=["S(0)", "S(2)"],
         )
         v2 = flow.get_variable(
             "v2",
             shape=(4, 3, 6),
             dtype=flow.float,
             initializer=flow.constant_initializer(0),
             trainable=True,
             parallel_distribution=["S(0)", "S(2)"],
         )
         x = flow.hierarchical_parallel_cast(
             x, parallel_distribution=["S(0)", "S(2)"])
         x += v1
         loss = flow.reshape_like(x, v2)
     loss = flow.hierarchical_parallel_cast(loss,
                                            parallel_distribution=["S(0)"])
     return loss
Ejemplo n.º 2
0
    def gpt_loader_fn() -> flow.typing.Numpy:
        with flow.scope.placement("cpu", device_strs, parallel_hierachy):
            tokens = flow.data.megatron_gpt_mmap_data_loader(
                data_file_prefix=data_file_prefix,
                seq_length=seq_length,
                num_samples=num_samples,
                batch_size=batch_size,
                dtype=dtype,
                shuffle=shuffle,
                random_seed=random_seed,
                split_sizes=split_sizes,
                split_index=split_index,
                parallel_distribution=parallel_distribution,
                start_from_saved_progress=start_from_saved_progress,
                name="GPTDataLoader",
            )

            if (isinstance(parallel_distribution, list)
                    and len(parallel_distribution) > 1):
                tokens = flow.hierarchical_parallel_cast(
                    tokens, parallel_distribution=["B", "B"])

        tokens = flow.hierarchical_parallel_cast(tokens,
                                                 parallel_distribution=["B"])

        return tokens
    def FlowJob(x: flow.typing.Numpy.Placeholder((4, 6), dtype=flow.float)):
        with flow.scope.placement("gpu", "0:0-3", (2, 2)):
            v = flow.get_variable(
                "x",
                shape=(4, 6),
                dtype=flow.float,
                initializer=flow.constant_initializer(0),
                trainable=True,
                parallel_distribution=["S(0)", "S(1)"],
            )
            x = flow.hierarchical_parallel_cast(
                x, parallel_distribution=["S(0)", "S(1)"])
            x += v
            loss = flow.reshape(x, (4, 2, 3))
        loss = flow.hierarchical_parallel_cast(loss,
                                               parallel_distribution=["S(0)"])

        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [1e-4]),
                           momentum=0).minimize(loss)
        return loss
 def test_fn(
         a: flow.typing.Numpy.Placeholder(a_shape),
         b: flow.typing.Numpy.Placeholder(b_shape),
         c: flow.typing.Numpy.Placeholder(c_shape),
 ) -> flow.typing.Numpy:
     var_a = flow.get_variable(
         name="var_a",
         shape=a_shape,
         dtype=flow.float32,
         initializer=flow.ones_initializer(),
         distribute=flow.distribute.split(1),
     )
     # S0 -> S1
     a = flow.hierarchical_parallel_cast(a, parallel_distribution=["S(1)"])
     a = var_a * a
     out = flow.matmul(a, b)
     # P -> B
     out = flow.hierarchical_parallel_cast(out, parallel_distribution=["B"])
     # S0 -> B
     c = flow.hierarchical_parallel_cast(c, parallel_distribution=["B"])
     out = flow.nn.bias_add(out, c)
     lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.001])
     flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(out)
     return out
 def test_fn(
     x: flow.typing.Numpy.Placeholder((1024, 4)),
     indices: flow.typing.Numpy.Placeholder(shape=(12, ), dtype=flow.int32),
 ) -> flow.typing.Numpy:
     with flow.scope.placement("gpu", "0:0-3", (2, 2)):
         x = flow.hierarchical_parallel_cast(
             x, parallel_distribution=["S(0)", "S(0)"])
         indices = flow.hierarchical_parallel_cast(
             indices, parallel_distribution=["B", "B"])
         x = flow.hierarchical_parallel_cast(
             x, parallel_distribution=["S(0)", "B"])
         v = flow.get_variable(
             name="v",
             shape=(1024, 4),
             parallel_distribution=["S(0)", "B"],
             initializer=flow.zeros_initializer(),
         )
         x = x + v
         indices = flow.hierarchical_parallel_cast(
             indices, parallel_distribution=["B", "S(0)"])
         x = flow.gather(x, indices)
         x = flow.hierarchical_parallel_cast(
             x,
             parallel_distribution=["B", "S(0)"],
             grad_mode="manual",
             grad_parallel_distribution=["B", "S(0)"],
         )
         x = flow.math.relu(x)
         x = flow.hierarchical_parallel_cast(
             x,
             parallel_distribution=["B", "B"],
         )
     x = flow.hierarchical_parallel_cast(x, parallel_distribution=["B"])
     flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                  [1e-3]),
                        momentum=0).minimize(x)
     return x
 def test_fn(
     x: flow.typing.Numpy.Placeholder((1024, 1024)),
     indices: flow.typing.Numpy.Placeholder(shape=(64, ), dtype=flow.int32),
 ) -> flow.typing.Numpy:
     with flow.scope.placement("gpu", "0:0-3", (2, 2)):
         if src[0] == "S(0)":
             x = flow.hierarchical_parallel_cast(
                 x, parallel_distribution=["B", "B"])
             indices = flow.hierarchical_parallel_cast(
                 indices, parallel_distribution=["S(0)", "S(0)"])
             if src[1] == "S(0)":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["B", "B"])
                 indices = flow.hierarchical_parallel_cast(
                     indices,
                     parallel_distribution=["S(0)", "S(0)"],
                 )
             elif src[1] == "S(1)":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["B", "S(1)"])
                 indices = flow.hierarchical_parallel_cast(
                     indices,
                     parallel_distribution=["S(0)", "B"],
                 )
             elif src[1] == "P":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["B", "S(0)"])
                 indices = flow.hierarchical_parallel_cast(
                     indices,
                     parallel_distribution=["S(0)", "B"],
                 )
             elif src[1] == "B":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["B", "B"])
                 indices = flow.hierarchical_parallel_cast(
                     indices,
                     parallel_distribution=["S(0)", "B"],
                 )
         elif src[0] == "P":
             x = flow.hierarchical_parallel_cast(
                 x, parallel_distribution=["S(0)", "S(0)"])
             indices = flow.hierarchical_parallel_cast(
                 indices, parallel_distribution=["B", "B"])
             if src[1] == "S(0)":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["S(0)", "B"])
                 indices = flow.hierarchical_parallel_cast(
                     indices,
                     parallel_distribution=["B", "S(0)"],
                 )
             elif src[1] == "S(1)":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["S(0)", "S(1)"])
                 indices = flow.hierarchical_parallel_cast(
                     indices, parallel_distribution=["B", "B"])
             elif src[1] == "P":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["S(0)", "S(0)"])
                 indices = flow.hierarchical_parallel_cast(
                     indices, parallel_distribution=["B", "B"])
             elif src[1] == "B":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["S(0)", "B"])
                 indices = flow.hierarchical_parallel_cast(
                     indices, parallel_distribution=["B", "B"])
         elif src[0] == "B":
             x = flow.hierarchical_parallel_cast(
                 x, parallel_distribution=["B", "B"])
             indices = flow.hierarchical_parallel_cast(
                 indices, parallel_distribution=["B", "B"])
             if src[1] == "S(0)":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["B", "B"])
                 indices = flow.hierarchical_parallel_cast(
                     indices,
                     parallel_distribution=["B", "S(0)"],
                 )
             elif src == "S(1)":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["B", "S(1)"])
                 indices = flow.hierarchical_parallel_cast(
                     indices, parallel_distribution=["B", "B"])
             elif src == "P":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["B", "S(0)"])
                 indices = flow.hierarchical_parallel_cast(
                     indices, parallel_distribution=["B", "B"])
             elif src == "B":
                 x = flow.hierarchical_parallel_cast(
                     x, parallel_distribution=["B", "B"])
                 indices = flow.hierarchical_parallel_cast(
                     indices, parallel_distribution=["B", "B"])
             else:
                 raise NotImplementedError
         x = flow.gather(x, indices)
         x = flow.hierarchical_parallel_cast(
             x,
             parallel_distribution=dst,
             name="gather_cast",
         )
         if dst[0] == "S(0)":
             x = flow.hierarchical_parallel_cast(
                 x,
                 parallel_distribution=["S(0)", "S(0)"],
             )
         elif dst[0] == "B":
             x = flow.hierarchical_parallel_cast(
                 x,
                 parallel_distribution=["B", "B"],
             )
         elif dst[0] == "S(1)":
             x = flow.hierarchical_parallel_cast(
                 x,
                 parallel_distribution=["S(1)", "S(1)"],
             )
         else:
             raise NotImplementedError
     x = flow.hierarchical_parallel_cast(x, parallel_distribution=["B"])
     return x