def FlowNnBnJob( x_full_precision: oft.Numpy.Placeholder(x.shape), mean: oft.Numpy.Placeholder(mean.shape), variance: oft.Numpy.Placeholder(variance.shape), offset: oft.Numpy.Placeholder(offset.shape), scale: oft.Numpy.Placeholder(scale.shape), ): with flow.scope.placement(device_type, "0:0"): x_full_precision += flow.get_variable( name="v1", shape=(1, ), dtype=flow.float32, initializer=flow.zeros_initializer(), ) if data_type == "float16": x = flow.cast(x_full_precision, flow.float16) else: x = x_full_precision y = flow.nn.batch_normalization(x, mean, variance, offset, scale, epsilon, axis=axis) y = flow.cast(y, flow.float32) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0).minimize(y) flow.watch_diff(x_full_precision, test_global_storage.Setter("x_diff")) return y
def FlowNnBnJob( x_full_precision: oft.Numpy.Placeholder(x.shape), mean: oft.Numpy.Placeholder(mean.shape), variance: oft.Numpy.Placeholder(variance.shape), offset: oft.Numpy.Placeholder(offset.shape), scale: oft.Numpy.Placeholder(scale.shape), ): with flow.scope.placement("gpu", "0:0"): x_full_precision += flow.get_variable( name="v1", shape=(1,), dtype=flow.float32, initializer=flow.zeros_initializer(), ) if data_type == "float16": x = flow.cast(x_full_precision, flow.float16) else: x = x_full_precision y = flow.nn.batch_normalization( x, mean, variance, offset, scale, epsilon, axis=axis ) y = flow.cast(y, flow.float32) flow.losses.add_loss(y) flow.watch_diff(x_full_precision, test_global_storage.Setter("x_diff")) return y
def diag_job( input_tensor: tp.Numpy.Placeholder(shape=(input_shape), dtype=flow.float), ) -> tp.Numpy: input_var = flow.get_variable( "input_tensor", shape=(input_shape), dtype=flow.float, initializer=flow.zeros_initializer(), trainable=True, ) input_tensor = input_tensor + input_var input_tensor = flow.cast_to_current_logical_view(input_tensor) input_tensor = flow.cast(input_tensor, type_name_to_flow_type[dtype]) output = flow.diag(input_tensor, dim) if (output.dtype == flow.int64 or output.dtype == flow.int8 or output.dtype == flow.int32): output = flow.cast(output, flow.float) flow.optimizer.Adam( flow.optimizer.PiecewiseConstantScheduler([], [1e-4])).minimize(output) flow.watch(input_tensor, test_global_storage.Setter("x")) flow.watch_diff(input_tensor, test_global_storage.Setter("x_diff")) flow.watch(output, test_global_storage.Setter("output")) flow.watch_diff(output, test_global_storage.Setter("output_diff")) return output
def test_masked_fill_fw_bw_job( x: oft.Numpy.Placeholder(x_shape, dtype=flow_type), mask: oft.Numpy.Placeholder(mask_shape, dtype=flow_type), ): with flow.scope.placement(device, "0:0"): y = flow.get_variable( name="vx", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) x += flow.cast(y, flow_type) mask = flow.cast(mask, dtype=flow.int8) if type_name == "float16": out = flow.cast( flow.masked_fill(flow.cast(x, flow.float16), mask, value), flow.float, ) else: out = flow.masked_fill(x, mask, value) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def gather_fn( params_def: oft.Numpy.Placeholder(input.shape, dtype=flow.float32), indices_def: oft.Numpy.Placeholder(index.shape, dtype=index_type), ) -> oft.Numpy: with flow.scope.placement(device_type, "0:0"): x_var = flow.get_variable( "input", shape=input.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), ) x_var = flow.cast_to_current_logical_view(x_var) x = x_var + params_def x_f16 = flow.cast(x, flow.float16) y_f16 = flow.dim_gather(x_f16, dim, indices_def) x_f32 = flow.cast(x, flow.float32) y_f32 = flow.cast(y_f16, flow.float32) y = flow.dim_gather(x, dim, indices_def) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-3]), momentum=0).minimize(y_f32) flow.watch_diff(x_f32, _compare_diff) return y_f32
def build(self, ids, table_ids, embedding_grad): ( num_unique_matrix, inverse_unique_partition_indices, _, cur_rank_unique_ids, _, cur_rank_inverse_indices, ) = flow._C.one_embedding_id_shuffle(ids, table_ids, num_tables) if fp16: embedding_grad = flow.cast(embedding_grad, flow.float16) cur_rank_unique_embedding_grad = flow._C.one_embedding_embedding_gradient_shuffle( embedding_grad, num_unique_matrix, cur_rank_inverse_indices, inverse_unique_partition_indices, ) if fp16: cur_rank_unique_embedding_grad = flow.cast( cur_rank_unique_embedding_grad, flow.float32) return ( cur_rank_unique_embedding_grad, flow.cast(cur_rank_unique_ids, flow.int32), flow.cast(cur_rank_inverse_indices, flow.int32), flow.cast(inverse_unique_partition_indices, flow.int32), )
def test_element_wise_mul_job( x: oft.Numpy.Placeholder(shape, dtype=flow.float), y: oft.Numpy.Placeholder(shape, dtype=flow.float), ): with flow.scope.placement(device, "0:0"): x += flow.get_variable( name="vx", shape=(1,), dtype=flow.float, initializer=flow.zeros_initializer(), ) y += flow.get_variable( name="vy", shape=(1,), dtype=flow.float, initializer=flow.zeros_initializer(), ) x = flow.cast(x, dtype=flow_type) y = flow.cast(y, dtype=flow_type) out = flow.math.multiply(x, y) out = flow.cast(out, dtype=flow.float) flow.losses.add_loss(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(y, test_global_storage.Setter("y")) flow.watch_diff(y, test_global_storage.Setter("y_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def oneflow_hardtanh( of_input_1: tp.Numpy.Placeholder(shape=input_1.shape, dtype=flow.float32), ) -> tp.Numpy: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=input_1.shape, dtype=flow.float32, initializer=flow.zeros_initializer(), name="x_var", ) x_var = of_input_1 + v x_f16 = flow.cast(x_var, flow.float16) of_hardtanh_out_f16 = flow.nn.hardtanh(x_f16, min_val, max_val) of_hardtanh_out_f32 = flow.cast(of_hardtanh_out_f16, flow.float32) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-3]), momentum=0).minimize(of_hardtanh_out_f32) flow.watch_diff(x_var, assert_prediction_grad) return of_hardtanh_out_f32
def test_fused_scale_tril_fw_bw_job( x: oft.Numpy.Placeholder(shape, dtype=flow_type), ): with flow.scope.placement(device, "0:0"): x_var = flow.get_variable( name="xv", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) x += flow.cast(x_var, dtype=flow_type) if type_name == "float16": out = flow.cast( flow.math.fused_scale_tril(flow.cast(x, flow.float16), diagonal, scale=scale), flow.float, ) else: out = flow.math.fused_scale_tril(x, diagonal, scale=scale) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def DropoutJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1, maxval=1), trainable=True, ) if data_type == "float16": x = flow.cast(flow.cast(x, flow.float16), dtype) of_out = flow.cast( flow.nn.dropout(flow.cast(x, flow.float16), rate=rate, seed=seed, name="dropout"), dtype, ) else: of_out = flow.nn.dropout(x, rate=rate, seed=seed, name="dropout") loss = flow.math.square(of_out) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(of_out, test_global_storage.Setter("out")) flow.watch_diff(of_out, test_global_storage.Setter("out_diff")) return loss
def SoftmaxJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "x", shape=x_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-0.1, maxval=0.1), trainable=True, ) if data_type == "float16": loss = flow.cast( flow.nn.softmax(flow.cast(x, dtype=flow.float16), axis=axis), dtype=flow.float, ) else: loss = flow.nn.softmax(x, axis=axis) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def FusedCastScaleJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(), trainable=True, ) scale = flow.get_variable( "scale", shape=(1, ), dtype=flow.float, initializer=flow.random_uniform_initializer(), trainable=False, ) loss = flow.cast(x, dtype=type_name_to_flow_type[in_dtype]) if test_fuse_cast_scale_pass: loss = flow.cast( loss, dtype=type_name_to_flow_type[out_dtype]) * flow.cast( scale, dtype=type_name_to_flow_type[out_dtype]) else: loss = fused_cast_scale( loss, flow.cast(scale, dtype=type_name_to_flow_type[out_dtype]), name="fused_cast_scale", ) loss = flow.cast(loss, dtype=flow.float) flow.watch(x, test_global_storage.Setter("x")) flow.watch(scale, test_global_storage.Setter("scale")) flow.watch(loss, test_global_storage.Setter("loss")) return loss
def flow_net(var_name, random_mask): with flow.scope.placement(device_type, "0:0-0"): x = flow.get_variable( name=var_name, shape=x_shape, dtype=flow.float32, initializer=flow.ones_initializer(), trainable=True, ) constant_val = flow.constant(3.0, dtype=flow.float32, shape=(1, )) x = x * constant_val x = x * 2.0 if device_type == "gpu": x = flow.cast(x, flow.float16) x = flow.math.relu(x) x = flow.cast(x, flow.float) loss = flow.math.reduce_mean(x * random_mask) flow.optimizer.Adam( flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]), beta1=beta1, beta2=beta2, epsilon=epsilon, do_bias_correction=True, ).minimize(loss) return x
def do_test_dropout_addend_numpy_fp16_p0(test_case, shape): np_x = np.random.randn(*shape).astype(np.float32) np_x_fp16 = np_x.astype(np.float16) np_addend = np.random.randn(*shape).astype(np.float32) np_addend_fp16 = np_addend.astype(np.float16) x_tensor = flow.tensor(np_x, requires_grad=True, device="cuda") x_tensor_fp16 = flow.cast(x_tensor, flow.float16) addend_tensor = flow.tensor(np_addend, requires_grad=True, device="cuda") addend_tensor_fp16 = flow.cast(addend_tensor, flow.float16) np_one_mask = np.ones_like(np_x) DropoutModule = flow.nn.Dropout(p=0.0) out = DropoutModule(x_tensor_fp16, addend_tensor_fp16) out_fp32 = flow.cast(out, flow.float32) test_case.assertTrue( np.allclose(out_fp32.numpy(), np_x_fp16 + np_addend_fp16, atol=1e-5, rtol=1e-5)) out_sum = out_fp32.sum() out_sum.backward() test_case.assertTrue( np.allclose(x_tensor.grad.numpy(), np_one_mask, atol=1e-5, rtol=1e-5)) test_case.assertTrue( np.allclose(addend_tensor.grad.numpy(), np_one_mask, atol=1e-5, rtol=1e-5))
def FlowJob(x_full_precision: oft.Numpy.Placeholder(x.shape, dtype=dtype)): with flow.scope.placement(device_type, "0:0"): x_full_precision += flow.get_variable( name="v1", shape=(1, ), dtype=dtype, initializer=flow.zeros_initializer()) if data_type == "float16": x = flow.cast(x_full_precision, flow.float16) else: x = x_full_precision y = flow.layers.batch_normalization(x, *flow_args, trainable=trainable, training=training) y = flow.cast(y, flow.float) if trainable: flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.001]), momentum=0).minimize(y) flow.watch_diff(x_full_precision, test_global_storage.Setter("x_diff")) return y
def FlowJob( value: oft.Numpy.Placeholder(value.shape), bias: oft.Numpy.Placeholder(bias.shape), ): with flow.scope.placement(device_type, "0:0"): value += flow.get_variable( name="v1", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) bias += flow.get_variable( name="v2", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) if data_type == "float16": comp_value = flow.cast(value, dtype=flow.float16) comp_bias = flow.cast(bias, dtype=flow.float16) else: comp_value = value comp_bias = bias loss = flow.nn.bias_add(comp_value, comp_bias, *flow_args) if data_type == "float16": loss = flow.cast(loss, dtype=flow.float) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0]), momentum=0).minimize(loss) flow.watch_diff(value, test_global_storage.Setter("value_diff")) flow.watch_diff(bias, test_global_storage.Setter("bias_diff")) return loss
def test_job(x: oft.Numpy.Placeholder(input_shape, dtype=flow.float32),): v = flow.get_variable( name="v", shape=(1,), dtype=flow.float32, initializer=flow.zeros_initializer(), ) x = x + v x1 = flow.identity(x) x2 = flow.identity(x) flow.watch_diff(x1, test_global_storage.Setter("x1_diff")) flow.watch_diff(x2, test_global_storage.Setter("x2_diff")) x1 = flow.cast(x1, data_type) x2 = flow.cast(x2, data_type) y1 = flow.layers.batch_normalization_relu(x1, axis=axis, name="BN1") y2 = flow.math.relu(flow.layers.batch_normalization(x2, axis=axis, name="BN2")) y1 = flow.cast(y1, flow.float32) y2 = flow.cast(y2, flow.float32) flow.watch(y1, test_global_storage.Setter("y1")) flow.watch(y2, test_global_storage.Setter("y2")) loss = flow.math.reduce_mean(y1 + y2) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0 ).minimize(flow.math.reduce_sum(loss)) return loss
def test_element_wise_mul_job( x: oft.Numpy.Placeholder(shape, dtype=flow.float), y: oft.Numpy.Placeholder(shape, dtype=flow.float), ): with flow.scope.placement(device, "0:0"): x += flow.get_variable( name="vx", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) y += flow.get_variable( name="vy", shape=(1, ), dtype=flow.float, initializer=flow.zeros_initializer(), ) x = flow.cast(x, dtype=flow_type) y = flow.cast(y, dtype=flow_type) out = flow.math.multiply(x, y) out = flow.cast(out, dtype=flow.float) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-4]), momentum=0).minimize(out) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(y, test_global_storage.Setter("y")) flow.watch_diff(y, test_global_storage.Setter("y_diff")) flow.watch(out, test_global_storage.Setter("out")) flow.watch_diff(out, test_global_storage.Setter("out_diff")) return out
def MatmulJob(): with flow.scope.placement(device_type, "0:0"): a = flow.get_variable( "a", shape=a_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=0, maxval=1), trainable=True, ) b = flow.get_variable( "b", shape=b_shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=0, maxval=1), trainable=True, ) if data_type == "float16": out = flow.matmul( flow.cast(a, dtype=flow.float16), flow.cast(b, dtype=flow.float16), transpose_a, transpose_b, alpha, ) c = flow.get_variable( "c", shape=out.shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1, maxval=1), trainable=True, ) loss = flow.cast( out + flow.cast(c, dtype=flow.float16), dtype=flow.float ) else: out = flow.matmul(a, b, transpose_a, transpose_b, alpha) c = flow.get_variable( "c", shape=out.shape, dtype=dtype, initializer=flow.random_uniform_initializer(minval=-1, maxval=1), trainable=True, ) loss = out + c flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0 ).minimize(loss) flow.watch(a, test_global_storage.Setter("a")) flow.watch_diff(a, test_global_storage.Setter("a_diff")) flow.watch(b, test_global_storage.Setter("b")) flow.watch_diff(b, test_global_storage.Setter("b_diff")) flow.watch(c, test_global_storage.Setter("c")) flow.watch_diff(c, test_global_storage.Setter("c_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def RepeatAccJob(a: oft.Numpy.Placeholder(shape)): if dtype == "float16": return flow.cast( flow.acc(flow.repeat(flow.cast(a, flow.float16), acc_num), acc_num), flow.float, ) else: return flow.acc(flow.repeat(a, acc_num), acc_num)
def ReduceSumLikeJob(x: oft.Numpy.Placeholder(input_shape)): with flow.scope.placement(device_type, "0:0"): if data_type == "float16": x = flow.cast(x, dtype=flow.float16) like = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims) y = reduce_sum_like(x, like, axis=axis) y = flow.cast(y, dtype=flow.float32) else: like = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims) y = reduce_sum_like(x, like, axis=axis) return y
def loss_function(real, pred): mask = flow.math.not_equal( real, flow.constant_scalar(0, dtype=flow.int64, name="zero constant")) real = flow.cast(real, dtype=flow.int32, name="cast_to_int32") loss_ = flow.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) mask = flow.cast(mask, dtype=loss_.dtype) loss_ *= mask return flow.math.reduce_mean(loss_)
def ReduceSumJob(x: oft.Numpy.Placeholder(input_shape)): with flow.scope.placement(device_type, "0:0"): if data_type == "float16": y = flow.cast( flow.math.reduce_sum(flow.cast(x, dtype=flow.float16), axis=axis, keepdims=keepdims), dtype=flow.float32, ) else: y = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims) return y
def _CreateAttentionMaskFromInputMask(to_mask_blob, from_seq_length, to_seq_length): output = flow.cast(to_mask_blob, dtype=flow.float) output = flow.reshape(output, [-1, 1, to_seq_length]) zeros = flow.constant(0.0, dtype=flow.float, shape=[from_seq_length, to_seq_length]) attention_mask_blob = zeros + output attention_mask_blob = flow.reshape( attention_mask_blob, [-1, 1, from_seq_length, to_seq_length] ) attention_mask_blob = flow.cast(attention_mask_blob, dtype=flow.float) addr_blob = (attention_mask_blob - 1.0) * 10000.0 return addr_blob
def get_extended_attention_mask( self, attention_mask, from_seq_length, to_seq_length ): output = flow.cast(attention_mask, dtype=flow.float32) output = flow.reshape(output, [-1, 1, to_seq_length]) # broadcast `from_tensor` from 2D to 3D output = output.expand(-1, from_seq_length, -1) attention_mask = flow.reshape(output, [-1, 1, from_seq_length, to_seq_length]) attention_mask = flow.cast(attention_mask, dtype=flow.float32) addr_blob = (attention_mask - 1.0) * 10000.0 return addr_blob
def do_test_dropout_numpy_fp16_p1(test_case, shape): np_x = np.random.randn(*shape).astype(np.float32) x_tensor = flow.tensor(np_x, requires_grad=True, device="cuda") x_tensor_fp16 = flow.cast(x_tensor, flow.float16) np_zero_mask = np.zeros_like(np_x) out = flow._C.dropout(x_tensor_fp16, p=1.0) out_fp32 = flow.cast(out, flow.float32) test_case.assertTrue( np.allclose(out_fp32.numpy(), np_zero_mask, atol=1e-5, rtol=1e-5)) out_sum = out_fp32.sum() out_sum.backward() test_case.assertTrue( np.allclose(x_tensor.grad.numpy(), np_zero_mask, atol=1e-5, rtol=1e-5))
def build(self, ids, table_ids): ( num_unique, unique_ids, unique_table_ids, inverse_indices, ) = flow._C.one_embedding_unique_key_value_pair( ids, table_ids, num_tables) return ( flow.cast(num_unique, flow.int32), flow.cast(unique_ids, flow.int32), flow.cast(unique_table_ids, flow.int32), flow.cast(inverse_indices, flow.int32), )
def _CreateAddrFromAttentionMask(attention_mask_blob, from_seq_length, to_seq_length): attention_mask_blob = flow.reshape(attention_mask_blob, [-1, 1, from_seq_length, to_seq_length]) attention_mask_blob = flow.cast(attention_mask_blob, dtype=flow.float) addr_blob = (attention_mask_blob - 1.0) * 10000.0 return addr_blob
def cast_forward( input_def: oft.Numpy.Placeholder( shape=input_shape, dtype=type_name_to_flow_type[dtype] ) ): with flow.scope.placement(device_type, "0:0"): return flow.cast(input_def, dtype=type_name_to_flow_type[dtype])
def logical_xor_op(input, other): """ Computes the element-wise logical XOR of the given input tensors. Zeros are treated as False and nonzeros are treated as True. Args: input (oneflow.Tensor): The input Tensor other (oneflow.Tensor): The Tensor to compute XOR with Returns: oneflow.Tensor: The output Tensor For example: .. code-block:: python >>> import numpy as np >>> import oneflow as flow >>> input1 = flow.tensor(np.array([1, 0, 1]).astype(np.float32), dtype=flow.float32) >>> input2 = flow.tensor(np.array([1, 0, 0]).astype(np.float32), dtype=flow.float32) >>> out = flow.logical_xor(input1, input2) >>> out tensor([False, False, True], dtype=oneflow.bool) """ if type(input) == type(other): # input and other are tensor. assert input.shape == other.shape, "shape of input and other should be same" if other.dtype != input.dtype: other = flow.cast(other, input.dtype) return flow._C.logical_xor(input, other)