def test_job(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32)):
     v = flow.get_variable(
         name="v",
         shape=(1, ),
         dtype=flow.float32,
         initializer=flow.zeros_initializer(),
     )
     x = x + v
     x1 = flow.identity(x)
     x2 = flow.identity(x)
     flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
     flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))
     x1 = flow.cast(x1, data_type)
     x2 = flow.cast(x2, data_type)
     y1 = flow.layers.batch_normalization_relu(x1, axis=axis, name="BN1")
     y2 = flow.math.relu(
         flow.layers.batch_normalization(x2, axis=axis, name="BN2"))
     y1 = flow.cast(y1, flow.float32)
     y2 = flow.cast(y2, flow.float32)
     flow.watch(y1, test_global_storage.Setter("y1"))
     flow.watch(y2, test_global_storage.Setter("y2"))
     y1 = flow.where(flow.math.greater(y2, v), y1, v)
     y2 = flow.where(flow.math.greater(y1, v), y2, v)
     loss = y1 + y2
     flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                  [0.001]),
                        momentum=0).minimize(flow.math.reduce_sum(loss))
     return loss
 def broadcast_to_compatible_with_fn(
     x_def: oft.ListNumpy.Placeholder(x_shape, dtype=flow.float),
     a_def: oft.ListNumpy.Placeholder(a_shape, dtype=flow.float),
     b_def: oft.ListNumpy.Placeholder(b_shape, dtype=flow.float),
 ):
     return flow.broadcast_to_compatible_with(
         x_def, [flow.identity(a_def), flow.identity(b_def)]
     )
Exemple #3
0
 def split_to_broadcast_job(input_blob: oft.Numpy.Placeholder(
     (96, 96))):
     with flow.scope.placement("gpu", "0:0"):
         src = flow.identity(
             input_blob.with_distribute(flow.distribute.split(0)))
     with flow.scope.placement("gpu", ["0:0", "1:0"]):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
 def FlowJob(
     value: oft.Numpy.Placeholder(x_shape), bias: oft.Numpy.Placeholder(bias_shape)
 ):
     with flow.scope.placement(device_type, "0:0"):
         value += flow.get_variable(
             name="v1",
             shape=(1,),
             dtype=flow.float,
             initializer=flow.zeros_initializer(),
         )
         bias += flow.get_variable(
             name="v2",
             shape=(1,),
             dtype=flow.float,
             initializer=flow.zeros_initializer(),
         )
         x1 = flow.identity(value)
         x2 = flow.identity(value)
         bias1 = flow.identity(bias)
         bias2 = flow.identity(bias)
         flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
         flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))
         flow.watch_diff(bias1, test_global_storage.Setter("bias1_diff"))
         flow.watch_diff(bias2, test_global_storage.Setter("bias2_diff"))
         if data_type == "float16":
             y1 = flow.cast(
                 flow.math.gelu(
                     flow.nn.bias_add(
                         flow.cast(x1, dtype=flow.float16),
                         flow.cast(bias1, dtype=flow.float16),
                         data_format=data_format,
                     )
                 ),
                 dtype=flow.float,
             )
             y2 = flow.cast(
                 flow.nn.fused_bias_add_gelu(
                     flow.cast(x2, dtype=flow.float16),
                     flow.cast(bias2, dtype=flow.float16),
                     data_format=data_format,
                 ),
                 dtype=flow.float,
             )
         else:
             y1 = flow.math.gelu(
                 flow.nn.bias_add(x1, bias1, data_format=data_format)
             )
             y2 = flow.nn.fused_bias_add_gelu(x2, bias2, data_format=data_format)
         flow.watch(y1, test_global_storage.Setter("y1"))
         flow.watch(y2, test_global_storage.Setter("y2"))
         flow.watch_diff(y1, test_global_storage.Setter("y1_diff"))
         flow.watch_diff(y2, test_global_storage.Setter("y2_diff"))
         loss = y1 + y2
     flow.optimizer.SGD(
         flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0
     ).minimize(flow.math.reduce_sum(loss))
     return loss
Exemple #5
0
 def build_b2b(input_blob, src_device_num, dst_device_num):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src = flow.identity(
             input_blob.with_distribute(flow.distribute.broadcast()))
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
Exemple #6
0
 def build_s2s_all2all(input_blob, src_axis, dst_axis):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src = flow.identity(
             input_blob.with_distribute(flow.distribute.split(src_axis)))
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(
             src.with_distribute(flow.distribute.split(dst_axis)))
     return dst
Exemple #7
0
 def build_p2b(input_blob, src_device_num, dst_device_num):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src = flow.identity(
             input_blob.with_distribute(flow.distribute.split(0)))
         src = flow.math.reduce_sum(src, axis=0)
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
Exemple #8
0
def _dense_layer(
    inputs,
    units,
    activation=None,
    use_bias=True,
    kernel_initializer=None,
    bias_initializer=None,
    trainable=True,
    name=None,
):
    in_shape = inputs.shape
    in_num_axes = len(in_shape)
    assert in_num_axes >= 2
    name_prefix = name if name is not None else id_util.UniqueStr("Dense_")
    inputs = flow.reshape(inputs,
                          (-1, in_shape[-1])) if in_num_axes > 2 else inputs
    weight = flow.get_variable(
        name="{}-weight".format(name_prefix),
        shape=(units, inputs.shape[1]),
        dtype=inputs.dtype,
        initializer=kernel_initializer
        if kernel_initializer is not None else flow.constant_initializer(0),
        trainable=trainable,
        model_name="weight",
    )
    weight = flow.identity(weight)
    weight = flow.repeat(weight, args.num_piece_in_batch)
    out = flow.matmul(a=inputs,
                      b=weight,
                      transpose_b=True,
                      name="{}_matmul".format(name_prefix))
    if use_bias:
        bias = flow.get_variable(
            name="{}-bias".format(name_prefix),
            shape=(units, ),
            dtype=inputs.dtype,
            initializer=bias_initializer
            if bias_initializer is not None else flow.constant_initializer(0),
            trainable=trainable,
            model_name="bias",
        )
        bias = flow.identity(bias)
        bias = flow.repeat(bias, args.num_piece_in_batch)
        out = flow.nn.bias_add(out,
                               bias,
                               name="{}_bias_add".format(name_prefix))
    out = (activation(out, name="{}_activation".format(name_prefix))
           if activation is not None else out)
    out = flow.reshape(out, in_shape[:-1] +
                       (units, )) if in_num_axes > 2 else out
    return out
Exemple #9
0
 def multi_lbi_job(x: oft.Numpy.Placeholder((96, 96, 96))):
     with flow.scope.placement(src_device_type,
                               "0:0-" + str(src_device_num - 1)):
         src_s0 = flow.identity(x.with_distribute(flow.distribute.split(0)))
         src_s1 = flow.identity(x.with_distribute(flow.distribute.split(1)))
         src_b = flow.identity(x.with_distribute(flow.distribute.split(1)))
         (t0_0, t0_1, t0_2) = flow.identity_n((src_s0, src_s1, src_b))
     with flow.scope.placement(dst_device_type,
                               "0:0-" + str(dst_device_num - 1)):
         t0_0 = t0_0.with_distribute(flow.distribute.split(1))
         t0_1 = t0_1.with_distribute(flow.distribute.broadcast())
         t0_2 = t0_2.with_distribute(flow.distribute.split(1))
         (t1_0, t1_1, t1_2) = flow.identity_n((t0_0, t0_1, t0_2))
     return (t1_0, t1_1, t1_2)
Exemple #10
0
def _conv2d_layer(
        args,
        name,
        input,
        filters,
        kernel_size=3,
        strides=1,
        padding="SAME",
        data_format="NCHW",
        dilation_rate=1,
        activation=op_conf_util.kRelu,
        use_bias=False,
        weight_initializer=flow.random_uniform_initializer(),
        bias_initializer=flow.random_uniform_initializer(),
):
    weight_shape = (filters, input.shape[1], kernel_size, kernel_size)
    weight = flow.get_variable(
        name + "-weight",
        shape=weight_shape,
        dtype=input.dtype,
        initializer=weight_initializer,
    )
    weight = flow.identity(weight)
    weight = flow.repeat(weight, args.num_piece_in_batch)
    output = flow.nn.conv2d(input,
                            weight,
                            strides,
                            padding,
                            None,
                            data_format,
                            dilation_rate,
                            name=name)
    if use_bias:
        bias = flow.get_variable(
            name + "-bias",
            shape=(filters, ),
            dtype=input.dtype,
            initializer=bias_initializer,
        )
        bias = flow.identity(bias)
        bias = flow.repeat(bias, args.num_piece_in_batch)
        output = flow.nn.bias_add(output, bias, data_format)
    if activation is not None:
        if activation == op_conf_util.kRelu:
            output = flow.math.relu(output)
        else:
            raise NotImplementedError
    return output
 def SparseSoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder(
     (batch_size, ), dtype=type_name_to_flow_type[label_type])):
     with flow.scope.placement(device_type, "0:0"):
         x = flow.get_variable(
             "x",
             shape=(batch_size, num_classes),
             dtype=type_name_to_flow_type[data_type],
             initializer=flow.random_uniform_initializer(minval=-10,
                                                         maxval=10),
             trainable=True,
         )
     with flow.scope.placement(device_type, "0:0-3"):
         labels = flow.parallel_cast(labels,
                                     distribute=flow.distribute.broadcast())
         logits = flow.parallel_cast(
             x, distribute=flow.distribute.split(len(x.shape) - 1))
         loss = flow.nn.distributed_sparse_softmax_cross_entropy_with_logits(
             labels, logits)
         loss = flow.math.square(loss)
     with flow.scope.placement(device_type, "0:0"):
         loss = flow.identity(loss)
         flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
             [], [0.0001]),
                            momentum=0).minimize(loss)
         flow.watch(x, test_global_storage.Setter("x"))
         flow.watch_diff(x, test_global_storage.Setter("x_diff"))
         flow.watch(loss, test_global_storage.Setter("loss"))
         flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
         return loss
Exemple #12
0
 def nvtx_range_job(x: oft.Numpy.Placeholder((4, 4, 1024, 1024))):
     x += flow.get_variable(
         name="v1",
         shape=(1, ),
         dtype=flow.float,
         initializer=flow.zeros_initializer(),
     )
     x = flow.math.relu(x)
     x = flow.profiler.nvtx_start(x, mark_prefix="softmax")
     x = flow.nn.softmax(x)
     x = flow.nn.softmax(x)
     x = flow.nn.softmax(x)
     x = flow.nn.softmax(x)
     x = flow.nn.softmax(x)
     x = flow.profiler.nvtx_end(x, mark_prefix="softmax")
     x = flow.math.relu(x)
     x = flow.profiler.nvtx_start(x, mark_prefix="gelu")
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.math.gelu(x)
     x = flow.profiler.nvtx_end(x, mark_prefix="gelu")
     flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                  [0]),
                        momentum=0).minimize(x)
     return flow.identity(x)
Exemple #13
0
 def PartialFcJob(labels: oft.Numpy.Placeholder(
     (batch_size, ), dtype=type_name_to_flow_type[label_type])):
     with flow.scope.placement(device_type, "0:0"):
         x = flow.get_variable(
             "x-weight",
             shape=(num_classes, 128),
             dtype=flow.float,
             initializer=flow.random_uniform_initializer(minval=-10,
                                                         maxval=10),
             trainable=True,
         )
     with flow.scope.placement(device_type, "0:0-3"):
         lebels_distribute = flow.distribute.broadcast()
         weight_distribute = flow.distribute.split(0)
         (
             maped_label,
             sampled_label,
             sampled_weight,
         ) = flow.distributed_partial_fc_sample(
             weight=x.with_distribute(weight_distribute),
             label=labels.with_distribute(lebels_distribute),
             num_sample=num_sample,
         )
     with flow.scope.placement(device_type, "0:0"):
         sampled_weight = flow.identity(sampled_weight)
         loss = flow.math.square(sampled_weight)
         flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
             [], [0.0001]),
                            momentum=0).minimize(loss)
         flow.watch(x, test_global_storage.Setter("x"))
         flow.watch_diff(x, test_global_storage.Setter("x_diff"))
         flow.watch_diff(sampled_weight,
                         test_global_storage.Setter("sampled_weight_diff"))
     return (x, maped_label, sampled_label, sampled_weight)
Exemple #14
0
 def SoftmaxJob():
     with flow.scope.placement(device_type, "0:0"):
         x = flow.get_variable(
             "x",
             shape=x_shape,
             dtype=dtype,
             initializer=flow.random_uniform_initializer(minval=-1.0,
                                                         maxval=1.0),
             trainable=True,
         )
         x1 = x
         x = flow.identity(x)
         if data_type == "float16":
             loss = flow.cast(
                 flow.nn.softmax(flow.cast(x, dtype=flow.float16),
                                 axis=axis),
                 dtype=flow.float,
             )
         else:
             loss = flow.nn.softmax(x, axis=axis)
         flow.watch(x, test_global_storage.Setter("x"))
         flow.watch_diff(x, test_global_storage.Setter("x_diff"))
         flow.watch(loss, test_global_storage.Setter("loss"))
         flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
         total_loss = loss * x1
         flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
             [], [0.0001]),
                            momentum=0).minimize(total_loss)
         return loss
Exemple #15
0
    def __call__(self, hidden_states):
        """
        hidden_states shape: (batch_size, seq_length, hidden_size)
        data parallel sbp: S(0)
        2d sbp: [S(0), B]
        """
        assert len(hidden_states.shape) == 3
        assert hidden_states.shape[-1] == self.hidden_size
        assert np.prod(
            hidden_states.shape[:-1]) == self.batch_size * self.seq_length

        h = hidden_states
        with flow.scope.namespace(self.name):
            if self.enable_profiling:
                h = flow.profiler.nvtx_start(
                    h, mark_prefix=f"transformer-{self.name}")

            h = flow.identity(h)
            with flow.experimental.scope.config(
                    checkpointing=self.checkpoint_activations):
                # input layernorm
                norm1 = layernorm("layernorm_1", h)
                # attention
                h = h + self.attn(norm1)
                # output layernorm
                norm2 = layernorm("layernorm_2", h)
                # mlp
                h = h + self.mlp(norm2)

            if self.enable_profiling:
                h = flow.profiler.nvtx_end(
                    h, mark_prefix=f"transformer-{self.name}")

        return h
Exemple #16
0
 def dynamic_concat_job(
     input_0_def: oft.ListNumpy.Placeholder(
         shape=input_static_shape, dtype=flow.float
     ),
     input_1_def: oft.ListNumpy.Placeholder(
         shape=input_static_shape, dtype=flow.float
     ),
 ):
     var_0 = flow.get_variable(
         "Var0",
         shape=(1,),
         dtype=flow.float,
         initializer=flow.constant_initializer(value=1, dtype=flow.float),
         trainable=True,
     )
     var_1 = flow.get_variable(
         "Var1",
         shape=(1,),
         dtype=flow.float,
         initializer=flow.constant_initializer(value=1, dtype=flow.float),
         trainable=True,
     )
     var_0 = flow.cast_to_current_logical_view(var_0)
     var_1 = flow.cast_to_current_logical_view(var_1)
     input_0_def = flow.cast_to_current_logical_view(input_0_def)
     input_1_def = flow.cast_to_current_logical_view(input_1_def)
     if callable(watch_cb):
         flow.watch(var_0, watch_cb)
         flow.watch(var_1, watch_cb)
         flow.watch(flow.identity(input_0_def), watch_cb)
         flow.watch(flow.identity(input_1_def), watch_cb)
     var_0 = var_0 * input_0_def
     var_1 = var_1 * input_1_def
     if callable(watch_cb):
         flow.watch(var_0, watch_cb)
         flow.watch(var_1, watch_cb)
     result = flow.concat(
         [var_0, var_1], axis=axis, max_dim_size=input_static_shape[axis]
     )
     flow.optimizer.SGD(
         flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0
     ).minimize(result)
     flow.watch_diff(var_0, make_watch_diff_cb(0))
     flow.watch_diff(var_1, make_watch_diff_cb(1))
     return result
Exemple #17
0
def cast_to_current_logical_view(
    x: oneflow._oneflow_internal.BlobDesc,
) -> oneflow._oneflow_internal.BlobDesc:
    if (isinstance(x, oneflow._oneflow_internal.ConsistentBlob)
            and flow.scope.mirrored_view_enabled()
            or (isinstance(x, oneflow._oneflow_internal.MirroredBlob)
                and flow.scope.consistent_view_enabled())):
        x = flow.identity(x)
    return x
Exemple #18
0
 def broadcast_to_broadcast_job(x: oft.Numpy.Placeholder((96, 96, 96))):
     with flow.scope.placement(
             src_device_type,
         [
             "0:0-" + str(src_device_num - 1),
             "1:0-" + str(src_device_num - 1)
         ],
     ):
         src = flow.identity(x.with_distribute(flow.distribute.broadcast()))
     with flow.scope.placement(
             dst_device_type,
         [
             "0:0-" + str(dst_device_num - 1),
             "1:0-" + str(dst_device_num - 1)
         ],
     ):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
Exemple #19
0
 def split_to_split_job(x: oft.Numpy.Placeholder((32, 16, 64, 48))):
     with flow.scope.placement(
             src_device_type,
         [
             "0:0-" + str(src_device_num - 1),
             "1:0-" + str(src_device_num - 1)
         ],
     ):
         src = flow.identity(
             x.with_distribute(flow.distribute.split(src_axis)))
     with flow.scope.placement(
             dst_device_type,
         [
             "0:0-" + str(dst_device_num - 1),
             "1:0-" + str(dst_device_num - 1)
         ],
     ):
         dst = flow.identity(
             src.with_distribute(flow.distribute.split(dst_axis)))
     return dst
Exemple #20
0
 def partial_sum_to_broadcast_job(x: oft.Numpy.Placeholder((96, 96, 96))):
     with flow.scope.placement(
             src_device_type,
         [
             "0:0-" + str(src_device_num - 1),
             "1:0-" + str(src_device_num - 1)
         ],
     ):
         src = flow.identity(x.with_distribute(flow.distribute.split(0)))
         src = flow.math.reduce_sum(src, axis=0)
     with flow.scope.placement(
             dst_device_type,
         [
             "0:0-" + str(dst_device_num - 1),
             "1:0-" + str(dst_device_num - 1)
         ],
     ):
         dst = flow.identity(
             src.with_distribute(flow.distribute.broadcast()))
     return dst
Exemple #21
0
 def test_job(
     x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),
     labels: oft.Numpy.Placeholder(label_shape, dtype=flow.int32),
 ):
     with flow.scope.placement(device_type, "0:0"):
         v = flow.get_variable(
             name="v",
             shape=(1,),
             dtype=flow.float32,
             initializer=flow.zeros_initializer(),
         )
         x = x + v
         x1 = flow.identity(x)
         x2 = flow.identity(x)
         flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
         flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))
         x1 = flow.cast(x1, data_type)
         x2 = flow.cast(x2, data_type)
     with flow.scope.placement(device_type, "0:0-3"):
         y1 = (
             flow.combined_margin_loss(
                 x1.with_distribute(flow.distribute.split(1)),
                 labels.with_distribute(flow.distribute.broadcast()),
                 m1,
                 m2,
                 m3,
             )
             * s
         )
         y2 = margin_loss(m1, m2, m3, s, x2, labels)
     with flow.scope.placement(device_type, "0:0"):
         y1 = flow.cast(y1, flow.float)
         y2 = flow.cast(y2, flow.float)
         flow.watch(y1, test_global_storage.Setter("y1"))
         flow.watch(y2, test_global_storage.Setter("y2"))
         loss = y1 + y2
         flow.optimizer.SGD(
             flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0
         ).minimize(flow.math.reduce_sum(loss))
     return loss
Exemple #22
0
 def ReduceMinJob(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float)):
     with flow.scope.placement(device_type, "0:0"):
         x += flow.get_variable(
             name="v1",
             shape=input_shape,
             dtype=flow.float,
             initializer=flow.zeros_initializer(),
         )
         loss = flow.math.reduce_min(x, axis=axis, keepdims=keepdims)
         loss = flow.identity(loss)
         flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
             [], [0.0001]),
                            momentum=0).minimize(loss)
         flow.watch(x, test_global_storage.Setter("x"))
         flow.watch_diff(x, test_global_storage.Setter("x_diff"))
         flow.watch(loss, test_global_storage.Setter("loss"))
         flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
         return loss
Exemple #23
0
 def ReduceMeanJob():
     with flow.scope.placement(device_type, "0:0"):
         x = flow.get_variable(
             "x",
             shape=input_shape,
             dtype=flow.float,
             initializer=flow.random_uniform_initializer(minval=-10,
                                                         maxval=10),
             trainable=True,
         )
         loss = flow.math.reduce_mean(x, axis=axis, keepdims=keepdims)
         loss = flow.identity(loss)
         flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
             [], [0.0001]),
                            momentum=0).minimize(loss)
         flow.watch(x, test_global_storage.Setter("x"))
         flow.watch_diff(x, test_global_storage.Setter("x_diff"))
         flow.watch(loss, test_global_storage.Setter("loss"))
         flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
         return loss
Exemple #24
0
 def two_stage_reduce_job(x: oft.Numpy.Placeholder((4, 20, 20, 20))):
     with flow.scope.placement(device_type, "0:0"):
         x += flow.get_variable(
             name="v1",
             shape=(1,),
             dtype=flow.float,
             initializer=flow.zeros_initializer(),
         )
     with flow.scope.placement(device_type, "0:0-3"):
         loss = flow_func(
             x.with_distribute(flow.distribute.split(split_axis)),
             axis=axis,
             keepdims=True,
         )
         loss = flow.identity(loss)
         flow.optimizer.SGD(
             flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0
         ).minimize(loss)
         flow.watch(x, test_global_storage.Setter("x"))
         flow.watch_diff(x, test_global_storage.Setter("x_diff"))
         return loss
 def test_fused_scale_tril_softmax_dropout_fw_bw_job():
     with flow.scope.placement(device_type, "0:0"):
         x = flow.get_variable(
             "x",
             shape=x_shape,
             dtype=dtype,
             initializer=flow.random_uniform_initializer(minval=-1.0, maxval=1.0),
             trainable=True,
         )
         flow.watch(x, test_global_storage.Setter("x"))
         x1 = flow.identity(x)
         x2 = flow.identity(x)
         flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
         flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))
         if data_type == "float16":
             y1 = flow.cast(
                 flow.nn.dropout(
                     flow.nn.softmax(
                         flow.math.fused_scale_tril(
                             flow.cast(x1, dtype=flow.float16),
                             diagonal=diagonal,
                             fill_value=fill_value,
                             scale=scale,
                         )
                     ),
                     rate=rate,
                     name="dropout",
                 ),
                 dtype=flow.float,
             )
             y2 = flow.cast(
                 flow.nn.fused_scale_tril_softmax_dropout(
                     flow.cast(x2, dtype=flow.float16),
                     diagonal=diagonal,
                     fill_value=fill_value,
                     scale=scale,
                     rate=rate,
                 ),
                 dtype=flow.float,
             )
         else:
             y1 = flow.nn.dropout(
                 flow.nn.softmax(
                     flow.math.fused_scale_tril(
                         x1, diagonal=diagonal, fill_value=fill_value, scale=scale
                     )
                 ),
                 rate=rate,
                 name="dropout",
             )
             y2 = flow.nn.fused_scale_tril_softmax_dropout(
                 x2,
                 diagonal=diagonal,
                 fill_value=fill_value,
                 scale=scale,
                 rate=rate,
             )
         flow.watch(y1, test_global_storage.Setter("y1"))
         flow.watch(y2, test_global_storage.Setter("y2"))
         flow.watch_diff(y1, test_global_storage.Setter("y1_diff"))
         flow.watch_diff(y2, test_global_storage.Setter("y2_diff"))
         loss = y1 + y2
         total_loss = loss * x
     flow.optimizer.SGD(
         flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0
     ).minimize(flow.math.reduce_sum(total_loss))
     return loss
Exemple #26
0
 def trt_identity_job(x=flow.FixedTensorDef(input_shape, dtype=dtype)):
     return flow.identity(x)
 def identity_fn():
     with flow.scope.placement(dst_device_tag,
                               "0:0-{}".format(device_num - 1)):
         var = get_var()
         return flow.identity(var)
 def split_to_split_job(x: oft.Numpy.Placeholder((32, 16, 64, 48))):
     with flow.scope.placement("gpu", "0:0-1"):
         src = flow.identity(x.with_distribute(flow.distribute.split(src_axis)))
         dst = flow.identity(src.with_distribute(flow.distribute.split(dst_axis)))
     return dst
 def split_to_broadcast_job(x: oft.Numpy.Placeholder((96, 96))):
     with flow.scope.placement("gpu", "0:0-1"):
         src = flow.identity(x.with_distribute(flow.distribute.split(src_axis)))
         dst = flow.identity(src.with_distribute(flow.distribute.broadcast()))
     return dst
 def partial_sum_to_split_job(x: oft.Numpy.Placeholder((96, 96, 96))):
     with flow.scope.placement("gpu", "0:0-1"):
         src = flow.identity(x.with_distribute(flow.distribute.split(0)))
         src = flow.math.reduce_sum(src, axis=0)
         dst = flow.identity(src.with_distribute(flow.distribute.split(dst_axis)))
     return dst