def PartialFcJob(labels: oft.Numpy.Placeholder( (batch_size, ), dtype=type_name_to_flow_type[label_type])): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x-weight", shape=(num_classes, 128), dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) with flow.scope.placement(device_type, "0:0-3"): lebels_distribute = flow.distribute.broadcast() weight_distribute = flow.distribute.split(0) ( maped_label, sampled_label, sampled_weight, ) = flow.distributed_partial_fc_sample( weight=x.with_distribute(weight_distribute), label=labels.with_distribute(lebels_distribute), num_sample=num_sample, ) with flow.scope.placement(device_type, "0:0"): sampled_weight = flow.identity(sampled_weight) loss = flow.math.square(sampled_weight) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch_diff(sampled_weight, test_global_storage.Setter("sampled_weight_diff")) return x, maped_label, sampled_label, sampled_weight
def test_job(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),): v = flow.get_variable( name="v", shape=(1,), dtype=flow.float32, initializer=flow.zeros_initializer(), ) x = x + v x1 = flow.identity(x) x2 = flow.identity(x) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) x1 = flow.cast(x1, data_type) x2 = flow.cast(x2, data_type) y1 = flow.layers.batch_normalization_relu(x1, axis=axis, name="BN1") y2 = flow.math.relu(flow.layers.batch_normalization(x2, axis=axis, name="BN2")) y1 = flow.cast(y1, flow.float32) y2 = flow.cast(y2, flow.float32) flow.watch(y1, test_global_storage.Setter("y1")) flow.watch(y2, test_global_storage.Setter("y2")) loss = flow.math.reduce_mean(y1 + y2) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(flow.math.reduce_sum(loss)) return loss
def UpsampleJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "input", shape=input_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.layers.upsample_2d( x, size=size, data_format=data_format, interpolation=interpolation, align_corners=align_corners, ) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0 ).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def SoftmaxJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-0.1, maxval=0.1), trainable=True, ) if data_type == "float16": loss = flow.cast( flow.nn.softmax(flow.cast(x, dtype=flow.float16), axis=axis), dtype=flow.float, ) else: loss = flow.nn.softmax(x, axis=axis) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def SparseSoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder( (batch_size, ), dtype=type_name_to_flow_type[label_type])): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=(batch_size, num_classes), dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) with flow.scope.placement(device_type, "0:0-3"): lebels_distribute = flow.distribute.broadcast() logits_distribute = flow.distribute.split(len(x.shape) - 1) loss = flow.nn.sparse_softmax_cross_entropy_with_logits( labels=labels.with_distribute(lebels_distribute), logits=x.with_distribute(logits_distribute), ) loss = flow.math.square(loss) with flow.scope.placement(device_type, "0:0"): loss = flow.identity(loss) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def TestMultiInputJob(): with flow.scope.placement("gpu", "0:0"): x1 = flow.get_variable( "x1", shape=shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) x2 = flow.get_variable( "x2", shape=shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = TestMultiInput(x1, x2) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x1, test_global_storage.Setter("x1")) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch(x2, test_global_storage.Setter("x2")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) return loss
def test_masked_fill_fw_bw_job( x: oft.Numpy.Placeholder(x_shape, dtype=flow_type), mask: oft.Numpy.Placeholder(mask_shape, dtype=flow_type), ): with flow.scope.placement(device, "0:0"): y = flow.get_variable( name="vx", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) x += flow.cast(y, flow_type) mask = flow.cast(mask, dtype=flow.int8) if type_name == "float16": out = flow.cast( flow.masked_fill(flow.cast(x, flow.float16), mask, value), flow.float, ) else: out = flow.masked_fill(x, mask, value) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def diag_job( input_tensor: tp.Numpy.Placeholder(shape=(input_shape), dtype=flow.float), ) -> tp.Numpy: input_var = flow.get_variable( "input_tensor", shape=(input_shape), dtype=flow.float, initializer=flow.zeros_initializer(), trainable=True, ) input_tensor = input_tensor + input_var input_tensor = flow.cast_to_current_logical_view(input_tensor) input_tensor = flow.cast(input_tensor, type_name_to_flow_type[dtype]) output = flow.diag(input_tensor, dim) if (output.dtype == flow.int64 or output.dtype == flow.int8 or output.dtype == flow.int32): output = flow.cast(output, flow.float) flow.optimizer.Adam( flow.optimizer.PiecewiseConstantScheduler([], [1e-4])).minimize(output) flow.watch(input_tensor, test_global_storage.Setter("x")) flow.watch_diff(input_tensor, test_global_storage.Setter("x_diff")) flow.watch(output, test_global_storage.Setter("output")) flow.watch_diff(output, test_global_storage.Setter("output_diff")) return output
def SparseSoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder( (batch_size, ), dtype=type_name_to_flow_type[label_type])): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=(batch_size, num_classes), dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) prediction = flow.nn.softmax(logits=x) with flow.scope.placement(device_type, "0:0-3"): lebels_distribute = flow.distribute.broadcast() prediction_distribute = flow.distribute.split( len(prediction.shape) - 1) loss = flow.nn.sparse_cross_entropy( labels=labels.with_distribute(lebels_distribute), prediction=prediction.with_distribute(prediction_distribute), ) with flow.scope.placement(device_type, "0:0"): loss = flow.math.square(loss) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def DropoutJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1, maxval=1), trainable=True, ) if data_type == "float16": x = flow.cast(flow.cast(x, flow.float16), dtype) of_out = flow.cast( flow.nn.dropout(flow.cast(x, flow.float16), rate=rate, seed=seed, name="dropout"), dtype, ) else: of_out = flow.nn.dropout(x, rate=rate, seed=seed, name="dropout") loss = flow.math.square(of_out) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(of_out, test_global_storage.Setter("out")) flow.watch_diff(of_out, test_global_storage.Setter("out_diff")) return loss
def test_fused_scale_tril_fw_bw_job( x: oft.Numpy.Placeholder(shape, dtype=flow_type), ): with flow.scope.placement(device, "0:0"): x_var = flow.get_variable( name="xv", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) x += flow.cast(x_var, dtype=flow_type) if type_name == "float16": out = flow.cast( flow.math.fused_scale_tril(flow.cast(x, flow.float16), diagonal, scale=scale), flow.float, ) else: out = flow.math.fused_scale_tril(x, diagonal, scale=scale) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def SplitLikeJob(x: oft.Numpy.Placeholder(x_shape, dtype=flow.float)): v = flow.get_variable( "x", shape=x_shape, dtype=flow.float, initializer=flow.constant_initializer(0), trainable=True, ) x += v like0 = flow.constant(0, dtype=flow.float, shape=like0_shape) like1 = flow.constant(0, dtype=flow.float, shape=like1_shape) with flow.scope.placement("gpu", "0:0"): y0, y1 = split_like(x, [like0, like1], "split_like") loss = y0 flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0 ).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return y0, y1
def Matmul( x: tp.Numpy.Placeholder((4, 4), dtype=flow.float32, batch_axis=None), y: tp.Numpy.Placeholder((4, 4), dtype=flow.float32, batch_axis=1), ) -> tp.Numpy: s = flow.matmul(x, y) # model parallel flow.watch(s, Watch) z = flow.matmul(s, x) # data parallel return z
def slice_fn(): with flow.scope.placement(device_tag, "0:0-{}".format(device_num - 1)): var = flow.get_variable( name="var", shape=var_shape, dtype=flow_dtype, initializer=flow.random_uniform_initializer(-10, 10, dtype=flow_dtype), distribute=flow.distribute.split(split_axis), ) flow.watch(var, test_global_storage.Setter("var")) ret = flow.experimental.logical_slice(var, slice_tuples) return ret
def MatmulJob(): with flow.scope.placement(device_type, "0:0"): a = flow.get_variable( "a", shape=a_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=0, maxval=1), trainable=True, ) b = flow.get_variable( "b", shape=b_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=0, maxval=1), trainable=True, ) if data_type == "float16": out = flow.matmul( flow.cast(a, dtype=flow.float16), flow.cast(b, dtype=flow.float16), transpose_a, transpose_b, alpha, ) c = flow.get_variable( "c", shape=out.shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1, maxval=1), trainable=True, ) loss = flow.cast( out + flow.cast(c, dtype=flow.float16), dtype=flow.float ) else: out = flow.matmul(a, b, transpose_a, transpose_b, alpha) c = flow.get_variable( "c", shape=out.shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1, maxval=1), trainable=True, ) loss = out + c flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0 ).minimize(loss) flow.watch(a, test_global_storage.Setter("a")) flow.watch_diff(a, test_global_storage.Setter("a_diff")) flow.watch(b, test_global_storage.Setter("b")) flow.watch_diff(b, test_global_storage.Setter("b_diff")) flow.watch(c, test_global_storage.Setter("c")) flow.watch_diff(c, test_global_storage.Setter("c_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def ReduceSumJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def CastJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(), trainable=True, ) loss = flow.cast(x, dtype=flow.float) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def ReduceMaxJob(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float)): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="v1", shape=input_shape, dtype=flow.float, initializer=flow.zeros_initializer(), ) loss = flow.math.reduce_max(x, axis=axis, keepdims=keepdims) loss = flow.identity(loss) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def ReduceSumJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0 ).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def LeakyReluJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = flow.nn.leaky_relu(x, alpha=alpha) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def L2NormalizeJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = flow.math.l2_normalize(x, axis=axis, epsilon=epsilon) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def SquareJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = flow.math.square(x) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def test_job( x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32), labels: oft.Numpy.Placeholder(label_shape, dtype=flow.int32), ): with flow.scope.placement("gpu", "0:0"): v = flow.get_variable( name="v", shape=(1, ), dtype=flow.float32, initializer=flow.zeros_initializer(), ) x = x + v x1 = flow.identity(x) x2 = flow.identity(x) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) x1 = flow.cast(x1, data_type) x2 = flow.cast(x2, data_type) with flow.scope.placement("gpu", "0:0-3"): y1 = (flow.combined_margin_loss( x1.with_distribute(flow.distribute.split(1)), labels.with_distribute(flow.distribute.broadcast()), m1, m2, m3, ) * s) y2 = margin_loss(m1, m2, m3, s, x2, labels) with flow.scope.placement("gpu", "0:0"): y1 = flow.cast(y1, flow.float) y2 = flow.cast(y2, flow.float) flow.watch(y1, test_global_storage.Setter("y1")) flow.watch(y2, test_global_storage.Setter("y2")) loss = y1 + y2 flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.001]), momentum=0).minimize(flow.math.reduce_sum(loss)) return loss
def CastJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=type_name_to_flow_type[dtype], initializer=flow.random_uniform_initializer(), trainable=True, ) loss = flow.cast(x, dtype=flow.float) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def FlattenJob() -> flow.typing.Numpy: with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.flatten(x, start_dim=start_dim, end_dim=end_dim) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0 ).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) return loss
def TransposeJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "input", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.transpose(x, perm) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def ActivationJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=shape, dtype=data_type, initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = of_activation_map[activation_type](x) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def ReduceMinJob(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float)): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="v1", shape=input_shape, dtype=flow.float, initializer=flow.zeros_initializer(), ) loss = flow.math.reduce_min(x, axis=axis, keepdims=keepdims) loss = flow.identity(loss) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def SoftmaxCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder( shape, dtype=type_name_to_flow_type[data_type])): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=shape, dtype=type_name_to_flow_type[data_type], initializer=flow.random_uniform_initializer(minval=-10, maxval=10), trainable=True, ) loss = flow.nn.softmax_cross_entropy_with_logits(labels=labels, logits=x) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def two_stage_reduce_job(x: oft.Numpy.Placeholder((4, 20, 20, 20))): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="v1", shape=(1,), dtype=flow.float, initializer=flow.zeros_initializer(), ) with flow.scope.placement(device_type, "0:0-3"): loss = flow_func( x.with_distribute(flow.distribute.split(split_axis)), axis=axis, keepdims=True, ) loss = flow.identity(loss) flow.losses.add_loss(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) return loss