def oneflow_Xmum( of_input_1: tp.ListNumpy.Placeholder(shape=data_shape), of_input_2: tp.ListNumpy.Placeholder(shape=data_shape), ) -> tp.ListNumpy: with flow.scope.placement(device_type, "0:0"): v1 = flow.get_variable( shape=(1, ), dtype=flow.float32, initializer=flow.zeros_initializer(), name="x1_var", ) v1 = flow.cast_to_current_logical_view(v1) x1_var = of_input_1 + v1 v2 = flow.get_variable( shape=(1, ), dtype=flow.float32, initializer=flow.zeros_initializer(), name="x2_var", ) v2 = flow.cast_to_current_logical_view(v2) x2_var = of_input_2 + v2 flow.watch_diff(x1_var, assert_prediction_grad) if compare_type == "maximum": of_Xmum_out = flow.math.maximum(x1_var, x2_var) elif compare_type == "minimum": of_Xmum_out = flow.math.minimum(x1_var, x2_var) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.001]), momentum=0).minimize(of_Xmum_out) return of_Xmum_out
def _batch_norm( inputs, epsilon, center=True, scale=True, trainable=True, is_training=True, name=None, ): return flow.layers.batch_normalization( inputs=inputs, axis=1, momentum=0.9, epsilon=epsilon, center=center, scale=scale, beta_initializer=flow.zeros_initializer(), gamma_initializer=flow.ones_initializer(), beta_regularizer=_get_regularizer(), gamma_regularizer=_get_regularizer(), moving_mean_initializer=flow.zeros_initializer(), moving_variance_initializer=flow.ones_initializer(), trainable=trainable, training=is_training, name=name, )
def FlowJob( x: oft.Numpy.Placeholder(x.shape, dtype=flow_type), y: oft.Numpy.Placeholder(y.shape, dtype=flow_type), ): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="x", shape=x.shape, dtype=flow_type, initializer=flow.zeros_initializer(), trainable=True, ) y += flow.get_variable( name="y", shape=y.shape, dtype=flow_type, initializer=flow.zeros_initializer(), trainable=True, ) loss = flow_op(x, y) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(loss) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch_diff(y, test_global_storage.Setter("y_diff")) return loss
def test_element_wise_mul_job( x: oft.Numpy.Placeholder(shape, dtype=flow.float), y: oft.Numpy.Placeholder(shape, dtype=flow.float), ): with flow.scope.placement(device, "0:0"): x += flow.get_variable( name="vx", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) y += flow.get_variable( name="vy", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) x = flow.cast(x, dtype=flow_type) y = flow.cast(y, dtype=flow_type) out = flow.math.multiply(x, y) out = flow.cast(out, dtype=flow.float) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(y, test_global_storage.Setter("y")) flow.watch_diff(y, test_global_storage.Setter("y_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def FlowJob( value: oft.Numpy.Placeholder(value.shape), bias: oft.Numpy.Placeholder(bias.shape), ): with flow.scope.placement(device_type, "0:0"): value += flow.get_variable( name="v1", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) bias += flow.get_variable( name="v2", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) if data_type == "float16": comp_value = flow.cast(value, dtype=flow.float16) comp_bias = flow.cast(bias, dtype=flow.float16) else: comp_value = value comp_bias = bias loss = flow.nn.bias_add(comp_value, comp_bias, *flow_args) if data_type == "float16": loss = flow.cast(loss, dtype=flow.float) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0).minimize(loss) flow.watch_diff(value, test_global_storage.Setter("value_diff")) flow.watch_diff(bias, test_global_storage.Setter("bias_diff")) return loss
def FlowJob( value: oft.Numpy.Placeholder(x_shape), bias: oft.Numpy.Placeholder(bias_shape) ): with flow.scope.placement(device_type, "0:0"): value += flow.get_variable( name="v1", shape=(1,), dtype=flow.float, initializer=flow.zeros_initializer(), ) bias += flow.get_variable( name="v2", shape=(1,), dtype=flow.float, initializer=flow.zeros_initializer(), ) x1 = flow.identity(value) x2 = flow.identity(value) bias1 = flow.identity(bias) bias2 = flow.identity(bias) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) flow.watch_diff(bias1, test_global_storage.Setter("bias1_diff")) flow.watch_diff(bias2, test_global_storage.Setter("bias2_diff")) if data_type == "float16": y1 = flow.cast( flow.math.gelu( flow.nn.bias_add( flow.cast(x1, dtype=flow.float16), flow.cast(bias1, dtype=flow.float16), data_format=data_format, ) ), dtype=flow.float, ) y2 = flow.cast( flow.nn.fused_bias_add_gelu( flow.cast(x2, dtype=flow.float16), flow.cast(bias2, dtype=flow.float16), data_format=data_format, ), dtype=flow.float, ) else: y1 = flow.math.gelu( flow.nn.bias_add(x1, bias1, data_format=data_format) ) y2 = flow.nn.fused_bias_add_gelu(x2, bias2, data_format=data_format) flow.watch(y1, test_global_storage.Setter("y1")) flow.watch(y2, test_global_storage.Setter("y2")) flow.watch_diff(y1, test_global_storage.Setter("y1_diff")) flow.watch_diff(y2, test_global_storage.Setter("y2_diff")) loss = y1 + y2 flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(flow.math.reduce_sum(loss)) return loss
def _get_initializer(model_name): if model_name == "weight": return flow.variance_scaling_initializer(2.0, mode="fan_out", distribution="random_normal", data_format="NCHW") elif model_name == "bias": return flow.zeros_initializer() elif model_name == "gamma": return flow.ones_initializer() elif model_name == "beta": return flow.zeros_initializer() elif model_name == "dense_weight": return flow.random_normal_initializer(0, 0.01)
def test_fused_scale_tril_fw_bw_job( x: oft.Numpy.Placeholder(shape, dtype=flow_type) ): with flow.scope.placement(device, "0:0"): x_var = flow.get_variable( name="xv", shape=(1,), dtype=flow.float, initializer=flow.zeros_initializer(), ) x += flow.cast(x_var, dtype=flow_type) if type_name == "float16": out = flow.cast( flow.math.fused_scale_tril( flow.cast(x, flow.float16), diagonal, scale=scale ), flow.float, ) else: out = flow.math.fused_scale_tril(x, diagonal, scale=scale) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0 ).minimize(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def _blob_random(shape, dtype=flow.int32, initializer=flow.zeros_initializer(flow.int32)): return flow.data.decode_random(shape=shape, dtype=dtype, batch_size=batch_size, initializer=initializer)
def test_fn( x: flow.typing.Numpy.Placeholder((1024, 4)), indices: flow.typing.Numpy.Placeholder(shape=(12, ), dtype=flow.int32), ) -> flow.typing.Numpy: with flow.scope.placement("gpu", "0:0-3", (2, 2)): x = flow.hierarchical_parallel_cast(x, nd_sbp=["S(0)", "S(0)"]) indices = flow.hierarchical_parallel_cast(indices, nd_sbp=["B", "B"]) x = flow.hierarchical_parallel_cast(x, nd_sbp=["S(0)", "B"]) v = flow.get_variable( name="v", shape=(1024, 4), nd_sbp=["S(0)", "B"], initializer=flow.zeros_initializer(), ) x = x + v indices = flow.hierarchical_parallel_cast(indices, nd_sbp=["B", "S(0)"]) x = flow.gather(x, indices) x = flow.hierarchical_parallel_cast( x, nd_sbp=["B", "S(0)"], grad_mode="manual", grad_nd_sbp=["B", "S(0)"], ) x = flow.math.relu(x) x = flow.hierarchical_parallel_cast(x, nd_sbp=["B", "B"]) x = flow.hierarchical_parallel_cast(x, nd_sbp=["B"]) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0).minimize(x) return x
def nvtx_range_job(x: oft.Numpy.Placeholder((4, 4, 1024, 1024))): x += flow.get_variable( name="v1", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) x = flow.math.relu(x) x = flow.profiler.nvtx_start(x, mark_prefix="softmax") x = flow.nn.softmax(x) x = flow.nn.softmax(x) x = flow.nn.softmax(x) x = flow.nn.softmax(x) x = flow.nn.softmax(x) x = flow.profiler.nvtx_end(x, mark_prefix="softmax") x = flow.math.relu(x) x = flow.profiler.nvtx_start(x, mark_prefix="gelu") x = flow.math.gelu(x) x = flow.math.gelu(x) x = flow.math.gelu(x) x = flow.math.gelu(x) x = flow.math.gelu(x) x = flow.math.gelu(x) x = flow.profiler.nvtx_end(x, mark_prefix="gelu") flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0).minimize(x) return flow.identity(x)
def oneflow_mseloss( of_input: tp.Numpy.Placeholder(shape=input.shape), of_target: tp.Numpy.Placeholder(shape=target.shape), ) -> Dict[str, tp.Numpy]: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=input.shape, dtype=flow.float32, initializer=flow.zeros_initializer(), name="x_var", ) x_var = of_input + v flow.watch_diff(x_var, assert_prediction_grad) mseloss = flow.nn.MSELoss(x_var, of_target, reduction="none", name="of_mseloss") mseloss_mean = flow.nn.MSELoss( x_var, of_target, reduction="mean", name="of_mseloss_reduce_mean" ) mseloss_sum = flow.nn.MSELoss( x_var, of_target, reduction="sum", name="of_mseloss_reduce_sum" ) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(mseloss_mean) return { "of_mse_loss": mseloss, "of_mse_loss_mean": mseloss_mean, "of_mse_loss_sum": mseloss_sum, }
def _batch_norm_add_relu(self, inputs, addend, name=None, last=False): if self.fuse_bn_add_relu: initializer = flow.zeros_initializer() if last else flow.ones_initializer() axis = 1 if self.data_format == "NHWC": axis = 3 return flow.layers.batch_normalization_add_relu( inputs=inputs, addend=addend, axis=axis, momentum=0.9, epsilon=1e-5, center=True, scale=True, trainable=self.trainable, training=self.training, gamma_initializer=initializer, moving_variance_initializer=initializer, gamma_regularizer=self.weight_regularizer, beta_regularizer=self.weight_regularizer, name=name + "_bn_add_relu", ) else: return flow.nn.relu( self._batch_norm(inputs, name + "_bn", last=last) + addend )
def test_masked_fill_fw_bw_job( x: oft.Numpy.Placeholder(x_shape, dtype=flow_type), mask: oft.Numpy.Placeholder(mask_shape, dtype=flow_type), ): with flow.scope.placement(device, "0:0"): y = flow.get_variable( name="vx", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) x += flow.cast(y, flow_type) mask = flow.cast(mask, dtype=flow.int8) if type_name == "float16": out = flow.cast( flow.masked_fill(flow.cast(x, flow.float16), mask, value), flow.float, ) else: out = flow.masked_fill(x, mask, value) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def resnet50(args, data_dir): (labels, images) = _data_load(args, data_dir) g_output_key.append("input_img") with flow.scope.namespace("Resnet"): stem = resnet_stem(images) body = resnet_conv_x_body(stem, lambda x: x) pool5 = flow.nn.avg_pool2d(body, ksize=7, strides=1, padding="VALID", data_format="NCHW", name="pool5") g_output_key.append("pool5") fc1001 = flow.layers.dense( flow.reshape(pool5, (pool5.shape[0], -1)), units=1001, use_bias=True, kernel_initializer=flow.xavier_uniform_initializer(), bias_initializer=flow.zeros_initializer(), trainable=g_trainable, name="fc1001", ) g_output_key.append("fc1001") loss = flow.nn.sparse_softmax_cross_entropy_with_logits( labels, fc1001, name="softmax_loss") g_output_key.append("cross_entropy") return loss
def ctc_loss_job( log_probs: tp.Numpy.Placeholder(shape=(max_input_length, batch_size, num_classes), dtype=flow_data_type), targets: tp.Numpy.Placeholder(shape=(batch_size, max_target_length), dtype=flow.int32), input_lengths: tp.Numpy.Placeholder(shape=(batch_size, ), dtype=flow.int32), target_lengths: tp.Numpy.Placeholder(shape=(batch_size, ), dtype=flow.int32), ) -> tp.Numpy: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=log_probs.shape, dtype=flow_data_type, initializer=flow.zeros_initializer(), name="x_var", ) x_var = log_probs + v flow.watch_diff(x_var, assert_loss_grad) loss = flow.ctc_loss( x_var, targets, input_lengths, target_lengths, blank, reduction, zero_infinity, ) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.001]), momentum=0).minimize(loss) return loss
def FlowNnBnJob( x_full_precision: oft.Numpy.Placeholder(x.shape), mean: oft.Numpy.Placeholder(mean.shape), variance: oft.Numpy.Placeholder(variance.shape), offset: oft.Numpy.Placeholder(offset.shape), scale: oft.Numpy.Placeholder(scale.shape), ): with flow.scope.placement(device_type, "0:0"): x_full_precision += flow.get_variable( name="v1", shape=(1, ), dtype=flow.float32, initializer=flow.zeros_initializer(), ) if data_type == "float16": x = flow.cast(x_full_precision, flow.float16) else: x = x_full_precision y = flow.nn.batch_normalization(x, mean, variance, offset, scale, epsilon, axis=axis) y = flow.cast(y, flow.float32) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0).minimize(y) flow.watch_diff(x_full_precision, test_global_storage.Setter("x_diff")) return y
def test_job(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32)): v = flow.get_variable( name="v", shape=(1, ), dtype=flow.float32, initializer=flow.zeros_initializer(), ) x = x + v x1 = flow.identity(x) x2 = flow.identity(x) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) x1 = flow.cast(x1, data_type) x2 = flow.cast(x2, data_type) y1 = flow.layers.batch_normalization_relu(x1, axis=axis, name="BN1") y2 = flow.math.relu( flow.layers.batch_normalization(x2, axis=axis, name="BN2")) y1 = flow.cast(y1, flow.float32) y2 = flow.cast(y2, flow.float32) flow.watch(y1, test_global_storage.Setter("y1")) flow.watch(y2, test_global_storage.Setter("y2")) y1 = flow.where(flow.math.greater(y2, v), y1, v) y2 = flow.where(flow.math.greater(y1, v), y2, v) loss = y1 + y2 flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0).minimize(flow.math.reduce_sum(loss)) return loss
def QuantizeJob(input: oft.Numpy.Placeholder( in_shape, dtype=type_name_to_flow_type[dtype])): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=in_shape, dtype=input.dtype, initializer=flow.zeros_initializer(input.dtype), trainable=True, ) input_x = input + x flow.watch_diff(input_x, test_global_storage.Setter("input_diff")) with flow.scope.placement(device_type, "0:0-%d" % (device_num - 1)): (scale, zero_point) = flow.quantization.min_max_observer( input_x, quantization_bit, quantization_scheme, quantization_formula, per_layer_quantization, ) out = flow.quantization.fake_quantization( input_x, scale, zero_point, quantization_bit, quantization_scheme, quantization_formula, ) loss = flow.math.reduce_mean(out) flow.optimizer.Adam( flow.optimizer.PiecewiseConstantScheduler( [], [0.001])).minimize(loss) return out
def foo_job(): x = flow.constant(1, shape=(2, 5), dtype=flow.float) y = flow.get_variable( name="var", shape=(64, 4), dtype=flow.float, initializer=flow.zeros_initializer(), ) return (x, y)
def _conv2d_layer( name, input, filters, kernel_size=3, strides=1, padding="SAME", group_num=1, data_format="NCHW", dilation_rate=1, activation=None, use_bias=False, weight_initializer=_get_initializer(), bias_initializer=flow.zeros_initializer(), weight_regularizer=_get_regularizer(), bias_regularizer=_get_regularizer(), ): weight_shape = ( int(filters), int(input.shape[1] / group_num), int(kernel_size), int(kernel_size), ) weight = flow.get_variable( name + "-weight", shape=weight_shape, dtype=input.dtype, initializer=weight_initializer, regularizer=weight_regularizer, ) output = flow.nn.conv2d( input, weight, strides, padding, None, data_format, dilation_rate, groups=group_num, name=name, ) if use_bias: bias = flow.get_variable( name + "-bias", shape=(filters, ), dtype=input.dtype, initializer=bias_initializer, regularizer=bias_regularizer, ) output = flow.nn.bias_add(output, bias, data_format) if activation is not None: if activation == op_conf_util.kRelu: output = flow.math.relu(output) else: raise NotImplementedError return output
def load_synthetic(image_size, batch_size): label = flow.data.decode_random( shape=(), dtype=flow.int32, batch_size=batch_size, initializer=flow.zeros_initializer(flow.int32), ) image = flow.data.decode_random(shape=(3, image_size, image_size), dtype=flow.float, batch_size=batch_size) return (label, image)
def oneflow_Xmum( of_input_1: tp.Numpy.Placeholder( shape=input_1.shape, dtype=value_type["of_type"] ), of_input_2: tp.Numpy.Placeholder( shape=input_2.shape, dtype=value_type["of_type"] ), ) -> tp.Numpy: with flow.scope.placement(device_type, "0:0"): v1 = flow.get_variable( shape=input_1.shape, dtype=value_type["of_type"], initializer=flow.zeros_initializer(), name="x1_var", ) x1_var = of_input_1 + v1 if not dx_only: v2 = flow.get_variable( shape=input_2.shape, dtype=value_type["of_type"], initializer=flow.zeros_initializer(), name="x2_var", ) x2_var = of_input_2 + v2 else: x2_var = flow.constant( value=1.5, shape=of_input_2.shape, dtype=value_type["of_type"] ) flow.watch_diff(x1_var, assert_prediction_grad) if compare_type == "maximum": of_Xmum_out = flow.math.maximum(x1_var, x2_var) elif compare_type == "minimum": of_Xmum_out = flow.math.minimum(x1_var, x2_var) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(of_Xmum_out) return of_Xmum_out
def FlowJob(x: oft.Numpy.Placeholder(x.shape)): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="v1", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) loss = flow_op(x, *flow_args) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0).minimize(loss) flow.watch_diff(x, test_global_storage.Setter("x_diff")) return loss
def op_function(x: tp.Numpy.Placeholder(input.shape, dtype=value_type)): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="input", shape=input.shape, dtype=value_type, initializer=flow.zeros_initializer(), ) out = flow.reflection_pad2d(x, padding) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0 ).minimize(out) flow.watch_diff(x, _compare_diff) return out
def oneflow_ones() -> tp.Numpy: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=np_out_ones.shape, dtype=flow.float32, initializer=flow.zeros_initializer(), name="x_var", ) of_ones = flow.ones(shape=input_shape, dtype=flow.float32) of_out = of_ones + v with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.001]), momentum=0).minimize(of_out) return of_ones
def PolyValJob(x: tp.Numpy.Placeholder(shape=in_shape)): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="x", shape=in_shape, dtype=flow_data_type, initializer=flow.zeros_initializer(), trainable=True, ) flow.watch_diff(x, assert_prediction_grad) out = flow.math.polyval(coeffs, x) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(out) return out
def oneflow_mish(of_input_1: tp.Numpy.Placeholder(shape=input_1.shape)) -> tp.Numpy: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=input_1.shape, dtype=flow.float32, initializer=flow.zeros_initializer(), name="x_var", ) x_var = of_input_1 + v flow.watch_diff(x_var, assert_prediction_grad) of_mish_out = flow.nn.mish(x_var) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(of_mish_out) return of_mish_out
def op_function(x: tp.Numpy.Placeholder(input.shape, dtype=flow.float32)): with flow.scope.placement(device_type, "0:0"): x += flow.get_variable( name="input", shape=input.shape, dtype=flow.float32, initializer=flow.zeros_initializer(), ) y_int32 = flow.replication_pad2d(x, padding) y_fp32 = flow.cast(y_int32, dtype=flow.float32) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0]), momentum=0).minimize(y_fp32) flow.watch_diff(x, _compare_diff) return y_fp32
def load_synthetic(args): total_device_num = args.num_nodes * args.gpu_num_per_node batch_size = total_device_num * args.batch_size_per_device label = flow.data.decode_random( shape=(), dtype=flow.int32, batch_size=batch_size, initializer=flow.zeros_initializer(flow.int32), ) shape = ((args.image_size, args.image_size, 3) if args.channel_last else (3, args.image_size, args.image_size)) image = flow.data.decode_random(shape=shape, dtype=flow.float, batch_size=batch_size) return label, image