예제 #1
0
 def split_to_broadcast_job(x: oft.Numpy.Placeholder((96, 96))):
     with flow.scope.placement("gpu", "0:0-1"):
         src = flow.identity(
             x.with_distribute(flow.distribute.split(src_axis)))
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
예제 #2
0
 def partial_sum_to_broadcast_job(x: oft.Numpy.Placeholder((96, 96, 96))):
     with flow.scope.placement(src_device_type, "0:0-" + str(src_device_num - 1)):
         src = flow.identity(x.with_distribute(flow.distribute.split(0)))
         src = flow.math.reduce_sum(src, axis=0)
     with flow.scope.placement(dst_device_type, "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(src.with_distribute(flow.distribute.broadcast()))
     return dst
예제 #3
0
 def partial_sum_to_split_job(x: oft.Numpy.Placeholder((96, 96, 96))):
     with flow.scope.placement("gpu", "0:0-1"):
         src = flow.identity(x.with_distribute(flow.distribute.split(0)))
         src = flow.math.reduce_sum(src, axis=0)
         dst = flow.identity(
             src.with_distribute(flow.distribute.split(dst_axis)))
     return dst
예제 #4
0
    def test_job(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),):
        v = flow.get_variable(
            name="v",
            shape=(1,),
            dtype=flow.float32,
            initializer=flow.zeros_initializer(),
        )

        x = x + v

        x1 = flow.identity(x)
        x2 = flow.identity(x)

        flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
        flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))

        x1 = flow.cast(x1, data_type)
        x2 = flow.cast(x2, data_type)

        y1 = flow.layers.batch_normalization_relu(x1, axis=axis, name="BN1")
        y2 = flow.math.relu(flow.layers.batch_normalization(x2, axis=axis, name="BN2"))

        y1 = flow.cast(y1, flow.float32)
        y2 = flow.cast(y2, flow.float32)

        flow.watch(y1, test_global_storage.Setter("y1"))
        flow.watch(y2, test_global_storage.Setter("y2"))

        loss = flow.math.reduce_mean(y1 + y2)
        flow.optimizer.SGD(
            flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0
        ).minimize(flow.math.reduce_sum(loss))

        return loss
예제 #5
0
 def split_to_split_job(x: oft.Numpy.Placeholder((32, 16, 64, 48))):
     with flow.scope.placement("gpu", "0:0-1"):
         src = flow.identity(
             x.with_distribute(flow.distribute.split(src_axis)))
         dst = flow.identity(
             src.with_distribute(flow.distribute.split(dst_axis)))
     return dst
예제 #6
0
def _dense_layer(
    inputs,
    units,
    activation=None,
    use_bias=True,
    kernel_initializer=None,
    bias_initializer=None,
    trainable=True,
    name=None,
):
    in_shape = inputs.shape
    in_num_axes = len(in_shape)
    assert in_num_axes >= 2

    name_prefix = name if name is not None else id_util.UniqueStr("Dense_")
    inputs = flow.reshape(inputs,
                          (-1, in_shape[-1])) if in_num_axes > 2 else inputs

    weight = flow.get_variable(
        name="{}-weight".format(name_prefix),
        shape=(units, inputs.shape[1]),
        dtype=inputs.dtype,
        initializer=(kernel_initializer if kernel_initializer is not None else
                     flow.constant_initializer(0)),
        trainable=trainable,
        model_name="weight",
    )
    weight = flow.identity(weight)
    weight = flow.repeat(weight, args.num_piece_in_batch)

    out = flow.matmul(
        a=inputs,
        b=weight,
        transpose_b=True,
        name="{}_matmul".format(name_prefix),
    )
    if use_bias:
        bias = flow.get_variable(
            name="{}-bias".format(name_prefix),
            shape=(units, ),
            dtype=inputs.dtype,
            initializer=(bias_initializer if bias_initializer is not None else
                         flow.constant_initializer(0)),
            trainable=trainable,
            model_name="bias",
        )

        bias = flow.identity(bias)
        bias = flow.repeat(bias, args.num_piece_in_batch)

        out = flow.nn.bias_add(out,
                               bias,
                               name="{}_bias_add".format(name_prefix))
    out = (activation(out, name="{}_activation".format(name_prefix))
           if activation is not None else out)
    out = flow.reshape(out, in_shape[:-1] +
                       (units, )) if in_num_axes > 2 else out

    return out
예제 #7
0
 def broadcast_to_compatible_with_fn(
         x_def: oft.ListNumpy.Placeholder(x_shape, dtype=flow.float),
         a_def: oft.ListNumpy.Placeholder(a_shape, dtype=flow.float),
         b_def: oft.ListNumpy.Placeholder(b_shape, dtype=flow.float),
 ):
     return flow.broadcast_to_compatible_with(
         x_def,
         [flow.identity(a_def), flow.identity(b_def)])
예제 #8
0
 def broadcast_to_broadcast_job(x: oft.Numpy.Placeholder((96, 96, 96))):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src = flow.identity(x.with_distribute(flow.distribute.broadcast()))
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
예제 #9
0
 def split_to_broadcast_job(input_blob: oft.Numpy.Placeholder(
     (96, 96))):
     with flow.scope.placement("gpu", "0:0"):
         src = flow.identity(
             input_blob.with_distribute(flow.distribute.split(0)))
     with flow.scope.placement("gpu", ["0:0", "1:0"]):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
예제 #10
0
 def build_s2s_all2all(input_blob, src_axis, dst_axis):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src = flow.identity(
             input_blob.with_distribute(flow.distribute.split(src_axis)))
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(
             src.with_distribute(flow.distribute.split(dst_axis)))
     return dst
예제 #11
0
 def build_b2b(input_blob, src_device_num, dst_device_num):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src = flow.identity(
             input_blob.with_distribute(flow.distribute.broadcast()))
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
예제 #12
0
 def split_to_split_job(x: oft.Numpy.Placeholder((32, 16, 64, 48))):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src = flow.identity(
             x.with_distribute(flow.distribute.split(src_axis)))
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(
             src.with_distribute(flow.distribute.split(dst_axis)))
     return dst
예제 #13
0
 def build_p2b(input_blob, src_device_num, dst_device_num):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src = flow.identity(
             input_blob.with_distribute(flow.distribute.split(0)))
         src = flow.math.reduce_sum(src, axis=0)
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
예제 #14
0
 def multi_lbi_job(x: oft.Numpy.Placeholder((96, 96, 96))):
     with flow.scope.placement(src_device_type, "0:0-" + str(src_device_num - 1)):
         src_s0 = flow.identity(x.with_distribute(flow.distribute.split(0)))
         src_s1 = flow.identity(x.with_distribute(flow.distribute.split(1)))
         src_b = flow.identity(x.with_distribute(flow.distribute.split(1)))
         (t0_0, t0_1, t0_2) = flow.identity_n((src_s0, src_s1, src_b))
     with flow.scope.placement(dst_device_type, "0:0-" + str(dst_device_num - 1)):
         t0_0 = t0_0.with_distribute(flow.distribute.split(1))
         t0_1 = t0_1.with_distribute(flow.distribute.broadcast())
         t0_2 = t0_2.with_distribute(flow.distribute.split(1))
         (t1_0, t1_1, t1_2) = flow.identity_n((t0_0, t0_1, t0_2))
     return t1_0, t1_1, t1_2
예제 #15
0
def _conv2d_layer(
        args,
        name,
        input,
        filters,
        kernel_size=3,
        strides=1,
        padding="SAME",
        data_format="NCHW",
        dilation_rate=1,
        activation=op_conf_util.kRelu,
        use_bias=False,
        weight_initializer=flow.random_uniform_initializer(),
        bias_initializer=flow.random_uniform_initializer(),
):
    weight_shape = (filters, input.shape[1], kernel_size, kernel_size)
    weight = flow.get_variable(
        name + "-weight",
        shape=weight_shape,
        dtype=input.dtype,
        initializer=weight_initializer,
    )
    weight = flow.identity(weight)
    weight = flow.repeat(weight, args.num_piece_in_batch)
    output = flow.nn.conv2d(input,
                            weight,
                            strides,
                            padding,
                            data_format,
                            dilation_rate,
                            name=name)
    if use_bias:
        bias = flow.get_variable(
            name + "-bias",
            shape=(filters, ),
            dtype=input.dtype,
            initializer=bias_initializer,
        )
        bias = flow.identity(bias)
        bias = flow.repeat(bias, args.num_piece_in_batch)
        output = flow.nn.bias_add(output, bias, data_format)

    if activation is not None:
        if activation == op_conf_util.kRelu:
            output = flow.math.relu(output)
        else:
            raise NotImplementedError

    return output
예제 #16
0
 def nvtx_range_job(x: oft.Numpy.Placeholder((4, 4, 1024, 1024))):
     x += flow.get_variable(
         name="v1",
         shape=(1, ),
         dtype=flow.float,
         initializer=flow.zeros_initializer(),
     )
     x = flow.math.relu(x)
     x = flow.profiler.nvtx_start(x, mark_prefix="softmax")
     x = flow.nn.softmax(x)
     x = flow.nn.softmax(x)
     x = flow.nn.softmax(x)
     x = flow.nn.softmax(x)
     x = flow.nn.softmax(x)
     x = flow.profiler.nvtx_end(x, mark_prefix="softmax")
     x = flow.math.relu(x)
     x = flow.profiler.nvtx_start(x, mark_prefix="gelu")
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.profiler.nvtx_end(x, mark_prefix="gelu")
     flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                  [0]),
                        momentum=0).minimize(x)
     return flow.identity(x)
예제 #17
0
    def SoftmaxJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=-1.0,
                                                            maxval=1.0),
                trainable=True,
            )
            x1 = x
            x = flow.identity(x)
            if data_type == "float16":
                loss = flow.cast(
                    flow.nn.softmax(flow.cast(x, dtype=flow.float16),
                                    axis=axis),
                    dtype=flow.float,
                )
            else:
                loss = flow.nn.softmax(x, axis=axis)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            total_loss = loss * x1

            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(total_loss)

            return loss
예제 #18
0
    def PartialFcJob(labels: oft.Numpy.Placeholder(
        (batch_size, ), dtype=type_name_to_flow_type[label_type])):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x-weight",
                shape=(num_classes, 128),
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
        with flow.scope.placement(device_type, "0:0-3"):
            lebels_distribute = flow.distribute.broadcast()
            weight_distribute = flow.distribute.split(0)
            (
                maped_label,
                sampled_label,
                sampled_weight,
            ) = flow.distributed_partial_fc_sample(
                weight=x.with_distribute(weight_distribute),
                label=labels.with_distribute(lebels_distribute),
                num_sample=num_sample,
            )
        with flow.scope.placement(device_type, "0:0"):
            sampled_weight = flow.identity(sampled_weight)
            loss = flow.math.square(sampled_weight)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch_diff(sampled_weight,
                            test_global_storage.Setter("sampled_weight_diff"))
        return x, maped_label, sampled_label, sampled_weight
예제 #19
0
    def SparseSoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder(
        (batch_size, ), dtype=type_name_to_flow_type[label_type])):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=(batch_size, num_classes),
                dtype=type_name_to_flow_type[data_type],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )

        with flow.scope.placement(device_type, "0:0-3"):
            lebels_distribute = flow.distribute.broadcast()
            logits_distribute = flow.distribute.split(len(x.shape) - 1)
            loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
                labels=labels.with_distribute(lebels_distribute),
                logits=x.with_distribute(logits_distribute),
            )
            loss = flow.math.square(loss)

        with flow.scope.placement(device_type, "0:0"):
            loss = flow.identity(loss)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
            return loss
예제 #20
0
    def dynamic_concat_job(
        input_0_def: oft.ListNumpy.Placeholder(
            shape=input_static_shape, dtype=flow.float
        ),
        input_1_def: oft.ListNumpy.Placeholder(
            shape=input_static_shape, dtype=flow.float
        ),
    ):
        var_0 = flow.get_variable(
            "Var0",
            shape=(1,),
            dtype=flow.float,
            initializer=flow.constant_initializer(value=1, dtype=flow.float),
            trainable=True,
        )
        var_1 = flow.get_variable(
            "Var1",
            shape=(1,),
            dtype=flow.float,
            initializer=flow.constant_initializer(value=1, dtype=flow.float),
            trainable=True,
        )
        var_0 = flow.cast_to_current_logical_view(var_0)
        var_1 = flow.cast_to_current_logical_view(var_1)
        input_0_def = flow.cast_to_current_logical_view(input_0_def)
        input_1_def = flow.cast_to_current_logical_view(input_1_def)
        if callable(watch_cb):
            flow.watch(var_0, watch_cb)
            flow.watch(var_1, watch_cb)
            flow.watch(flow.identity(input_0_def), watch_cb)
            flow.watch(flow.identity(input_1_def), watch_cb)

        var_0 = var_0 * input_0_def
        var_1 = var_1 * input_1_def
        if callable(watch_cb):
            flow.watch(var_0, watch_cb)
            flow.watch(var_1, watch_cb)

        result = flow.concat(
            [var_0, var_1], axis=axis, max_dim_size=input_static_shape[axis]
        )
        flow.optimizer.SGD(
            flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0
        ).minimize(result)
        flow.watch_diff(var_0, make_watch_diff_cb(0))
        flow.watch_diff(var_1, make_watch_diff_cb(1))
        return result
예제 #21
0
 def DynamicBinaryJob(x: oft.ListNumpy.Placeholder((20, ))):
     print("in_shape: ", x.shape)
     with flow.scope.placement("cpu", "0:0"):
         out_list = flow.experimental.dynamic_binary_split(x,
                                                           base_shift=4,
                                                           out_num=6)
         id_out_list = []
         for out_blob in out_list:
             print("out_shape: ", out_blob.shape)
             id_out_list.append(flow.identity(out_blob))
     with flow.scope.placement("cpu", "1:0"):
         out1 = flow.experimental.dynamic_binary_concat(id_out_list, x)
         print("concat_shape: ", out1.shape)
     with flow.scope.placement("cpu", "0:0"):
         out2 = flow.identity(out1)
         print("return_shape: ", out2.shape)
     return out2
예제 #22
0
def cast_to_current_logical_view(
    x: remote_blob_util.BlobDef, ) -> remote_blob_util.BlobDef:
    if (isinstance(x, remote_blob_util.ConsistentBlob)
            and oneflow.scope.mirrored_view_enabled()) or (
                isinstance(x, remote_blob_util.MirroredBlob)
                and oneflow.scope.consistent_view_enabled()):
        x = oneflow.identity(x)
    return x
예제 #23
0
def cast_to_current_logical_view(
    x: oneflow_api.BlobDesc, ) -> oneflow_api.BlobDesc:
    if (isinstance(x, oneflow_api.ConsistentBlob)
            and oneflow.scope.mirrored_view_enabled()) or (
                isinstance(x, oneflow_api.MirroredBlob)
                and oneflow.scope.consistent_view_enabled()):
        x = oneflow.identity(x)
    return x
예제 #24
0
    def test_job(
            x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),
            labels: oft.Numpy.Placeholder(label_shape, dtype=flow.int32),
    ):
        with flow.scope.placement("gpu", "0:0"):
            v = flow.get_variable(
                name="v",
                shape=(1, ),
                dtype=flow.float32,
                initializer=flow.zeros_initializer(),
            )
            x = x + v

            x1 = flow.identity(x)
            x2 = flow.identity(x)

            flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
            flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))

            x1 = flow.cast(x1, data_type)
            x2 = flow.cast(x2, data_type)

        with flow.scope.placement("gpu", "0:0-3"):
            y1 = (flow.combined_margin_loss(
                x1.with_distribute(flow.distribute.split(1)),
                labels.with_distribute(flow.distribute.broadcast()),
                m1,
                m2,
                m3,
            ) * s)
            y2 = margin_loss(m1, m2, m3, s, x2, labels)

        with flow.scope.placement("gpu", "0:0"):
            y1 = flow.cast(y1, flow.float)
            y2 = flow.cast(y2, flow.float)

            flow.watch(y1, test_global_storage.Setter("y1"))
            flow.watch(y2, test_global_storage.Setter("y2"))
            loss = y1 + y2
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.001]),
                               momentum=0).minimize(flow.math.reduce_sum(loss))

        return loss
예제 #25
0
 def slice(input_blob: oft.Numpy.Placeholder(shape=(2, 5, 4), dtype=flow.float)):
     x = flow.get_variable(
         shape=(2, 5, 4),
         dtype=flow.float,
         initializer=flow.random_uniform_initializer(0, 2),
         name="variable",
     )
     x = flow.identity(x)
     flow.watch_diff(x, slice_grad_cb)
     y = flow.slice_v2(x, [(None, None, None), (2, -2, None)])
     flow.losses.add_loss(y)
     return y
예제 #26
0
 def slice(input_blob: oft.Numpy.Placeholder(shape=(2, 5, 4),
                                             dtype=flow.float)):
     x = flow.get_variable(
         shape=(2, 5, 4),
         dtype=flow.float,
         initializer=flow.random_uniform_initializer(0, 2),
         name="variable",
     )
     x = flow.identity(x)
     flow.watch_diff(x, slice_grad_cb)
     y = flow.slice_v2(x, [(None, None, None), (2, -2, None)])
     flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                  [1e-3]),
                        momentum=0).minimize(y)
     return y
예제 #27
0
    def ReduceMaxJob(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float)):
        with flow.scope.placement(device_type, "0:0"):
            x += flow.get_variable(
                name="v1",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            loss = flow.math.reduce_max(x, axis=axis, keepdims=keepdims)
            loss = flow.identity(loss)
            flow.losses.add_loss(loss)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss
예제 #28
0
    def ReduceMinJob(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float)):
        with flow.scope.placement(device_type, "0:0"):
            x += flow.get_variable(
                name="v1",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            loss = flow.math.reduce_min(x, axis=axis, keepdims=keepdims)
            loss = flow.identity(loss)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss
    def two_stage_reduce_job(x: oft.Numpy.Placeholder((4, 20, 20, 20))):
        with flow.scope.placement(device_type, "0:0"):
            x += flow.get_variable(
                name="v1",
                shape=(1,),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
        with flow.scope.placement(device_type, "0:0-3"):
            loss = flow_func(
                x.with_distribute(flow.distribute.split(split_axis)),
                axis=axis,
                keepdims=True,
            )
            loss = flow.identity(loss)
            flow.losses.add_loss(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            return loss
예제 #30
0
    def ReduceMeanJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = flow.math.reduce_mean(x, axis=axis, keepdims=keepdims)
            # TODO: fix facade and add_loss bug
            loss = flow.identity(loss)
            flow.losses.add_loss(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss