def _model(dense_fields, wide_sparse_fields, deep_sparse_fields): wide_sparse_fields = flow.parallel_cast(wide_sparse_fields, distribute=flow.distribute.broadcast()) wide_embedding_table = flow.get_variable( name='wide_embedding', shape=(FLAGS.wide_vocab_size, 1), initializer=flow.random_uniform_initializer(minval=-0.05, maxval=0.05), distribute=flow.distribute.split(0), ) wide_embedding = flow.gather(params=wide_embedding_table, indices=wide_sparse_fields) wide_embedding = flow.reshape(wide_embedding, shape=(-1, wide_embedding.shape[-1] * wide_embedding.shape[-2])) wide_scores = flow.math.reduce_sum(wide_embedding, axis=[1], keepdims=True) wide_scores = flow.parallel_cast(wide_scores, distribute=flow.distribute.split(0), gradient_distribute=flow.distribute.broadcast()) deep_sparse_fields = flow.parallel_cast(deep_sparse_fields, distribute=flow.distribute.broadcast()) deep_embedding_table = flow.get_variable( name='deep_embedding', shape=(FLAGS.deep_vocab_size, FLAGS.deep_embedding_vec_size), initializer=flow.random_uniform_initializer(minval=-0.05, maxval=0.05), distribute=flow.distribute.split(1), ) deep_embedding = flow.gather(params=deep_embedding_table, indices=deep_sparse_fields) deep_embedding = flow.parallel_cast(deep_embedding, distribute=flow.distribute.split(0), gradient_distribute=flow.distribute.split(2)) deep_embedding = flow.reshape(deep_embedding, shape=(-1, deep_embedding.shape[-1] * deep_embedding.shape[-2])) deep_features = flow.concat([deep_embedding, dense_fields], axis=1) for idx, units in enumerate(DEEP_HIDDEN_UNITS): deep_features = flow.layers.dense( deep_features, units=units, kernel_initializer=flow.glorot_uniform_initializer(), bias_initializer=flow.constant_initializer(0.0), activation=flow.math.relu, name='fc' + str(idx + 1) ) deep_features = flow.nn.dropout(deep_features, rate=FLAGS.deep_dropout_rate) deep_scores = flow.layers.dense( deep_features, units=1, kernel_initializer=flow.glorot_uniform_initializer(), bias_initializer=flow.constant_initializer(0.0), name='fc' + str(len(DEEP_HIDDEN_UNITS) + 1) ) scores = wide_scores + deep_scores return scores
def dynamic_concat_job( input_0_def: oft.ListNumpy.Placeholder(shape=input_static_shape, dtype=flow.float), input_1_def: oft.ListNumpy.Placeholder(shape=input_static_shape, dtype=flow.float), ): var_0 = flow.get_variable( "Var0", shape=(1, ), dtype=flow.float, initializer=flow.constant_initializer(value=1, dtype=flow.float), trainable=True, ) var_1 = flow.get_variable( "Var1", shape=(1, ), dtype=flow.float, initializer=flow.constant_initializer(value=1, dtype=flow.float), trainable=True, ) var_0 = flow.cast_to_current_logical_view(var_0) var_1 = flow.cast_to_current_logical_view(var_1) input_0_def = flow.cast_to_current_logical_view(input_0_def) input_1_def = flow.cast_to_current_logical_view(input_1_def) if callable(watch_cb): flow.watch(var_0, watch_cb) flow.watch(var_1, watch_cb) flow.watch(flow.identity(input_0_def), watch_cb) flow.watch(flow.identity(input_1_def), watch_cb) var_0 = var_0 * input_0_def var_1 = var_1 * input_1_def if callable(watch_cb): flow.watch(var_0, watch_cb) flow.watch(var_1, watch_cb) result = flow.concat([var_0, var_1], axis=axis, max_dim_size=input_static_shape[axis]) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0).minimize(result) flow.watch_diff(var_0, make_watch_diff_cb(0)) flow.watch_diff(var_1, make_watch_diff_cb(1)) return result
def IdentityLoss(name): w = flow.get_variable( name, (10,), initializer=flow.constant_initializer(100) ) y = flow.math.reduce_sum(w) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [5]), momentum=0 ).minimize(y) return y
def assign_fn(value_def: oft.Numpy.Placeholder(value.shape, dtype=dtype)): with flow.scope.placement(device_type, "1:0"): var = flow.get_variable( name="var", shape=value.shape, dtype=dtype, initializer=flow.constant_initializer(0), ) assign(var, value_def)
def lenet(): with flow.scope.placement("cpu", "0:0"): x = flow.get_variable( name="x1", shape=(100,1, 28, 28), dtype=flow.float, initializer=flow.constant_initializer(1), ) return Lenet(x)
def oneflow_marginloss( of_anchor: tp.Numpy.Placeholder(shape=anchor.shape), of_pos: tp.Numpy.Placeholder(shape=pos.shape), of_neg: tp.Numpy.Placeholder(shape=neg.shape), ) -> Dict[str, tp.Numpy]: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=anchor.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), name="x_var", ) x_anchor = of_anchor + v flow.watch_diff(x_anchor, assert_prediction_grad) triplet_marginloss = flow.nn.TripletMarginLoss( x_anchor, of_pos, of_neg, margin=margin, p=p, swap=swap, reduction="none", name="of_tripletmarginloss", ) triplet_marginloss_mean = flow.nn.TripletMarginLoss( x_anchor, of_pos, of_neg, margin=margin, p=p, swap=swap, reduction="mean", name="of_tripletmarginloss_mean", ) triplet_marginloss_sum = flow.nn.TripletMarginLoss( x_anchor, of_pos, of_neg, margin=margin, p=p, swap=swap, reduction="sum", name="of_tripletmarginloss_sum", ) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-3]), momentum=0).minimize(triplet_marginloss_mean) return { "of_triplet_margin_loss": triplet_marginloss, "of_triplet_margin_loss_mean": triplet_marginloss_mean, "of_triplet_margin_loss_sum": triplet_marginloss_sum, }
def _AddMaskedLanguageModelLoss( input_blob, output_weights_blob, positions_blob, label_id_blob, label_weight_blob, seq_length, hidden_size, vocab_size, max_predictions_per_seq, hidden_act, initializer_range, ): with flow.scope.namespace("other"): sum_label_weight_blob = flow.math.reduce_sum(label_weight_blob, axis=[-1]) ones = sum_label_weight_blob * 0.0 + 1.0 sum_label_weight_blob = flow.math.reduce_sum(sum_label_weight_blob) batch_size = flow.math.reduce_sum(ones) sum_label_weight_blob = sum_label_weight_blob / batch_size with flow.scope.namespace("cls-predictions"): input_blob = _GatherIndexes(input_blob, positions_blob, seq_length, hidden_size) with flow.scope.namespace("transform"): if callable(hidden_act): act_fn = op_conf_util.kNone else: act_fn = hidden_act input_blob = bert_util._FullyConnected( input_blob, input_size=hidden_size, units=hidden_size, activation=act_fn, weight_initializer=bert_util.CreateInitializer(initializer_range), name="dense", ) if callable(hidden_act): input_blob = hidden_act(input_blob) input_blob = bert_util._LayerNorm(input_blob, hidden_size) output_bias = flow.get_variable( name="output_bias", shape=[vocab_size], dtype=input_blob.dtype, initializer=flow.constant_initializer(1.0), ) logit_blob = flow.matmul(input_blob, output_weights_blob, transpose_b=True) logit_blob = flow.nn.bias_add(logit_blob, output_bias) label_id_blob = flow.reshape(label_id_blob, [-1]) pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits( logits=logit_blob, labels=label_id_blob ) pre_example_loss = flow.reshape(pre_example_loss, [-1, max_predictions_per_seq]) numerator = pre_example_loss * label_weight_blob with flow.scope.namespace("loss"): numerator = flow.math.reduce_sum(numerator, axis=[-1]) denominator = sum_label_weight_blob + 1e-5 loss = numerator / denominator return loss, pre_example_loss, logit_blob
def test_int_initializer(test_case): initializers = [ flow.random_uniform_initializer(minval=-6, maxval=18, dtype=flow.int32), flow.constant_initializer(value=4, dtype=flow.int32), ] for initializer in initializers: CompareTwoDistribution(test_case, flow.int32, initializer)
def relu_fn(): with flow.scope.placement(device_type, "1:0"): var = flow.get_variable( name="var", shape=value.shape, dtype=dtype, initializer=flow.constant_initializer(0), ) ret = flow.nn.relu(var) return ret
def Foo(): with flow.scope.placement("gpu", device_name): w = flow.get_variable( "w", shape=(10, ), dtype=flow.float, initializer=flow.constant_initializer(0), ) print(w.numpy(0)) flow.losses.add_loss(w)
def Foo(): with flow.scope.placement("gpu", device_name): w = flow.get_variable( "w", shape=(10, ), dtype=flow.float, initializer=flow.constant_initializer(0), ) print(w.numpy(0)) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0.1]), momentum=0).minimize(w)
def do_gather(x_blob, i_blob): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "params", shape=params.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), ) x = x + x_blob y = flow.gather(x, i_blob, axis=axis, batch_dims=batch_dims) flow.losses.add_loss(y) flow.watch_diff(x, compare_fn) return y
def oneflow_marginloss( of_input1: tp.Numpy.Placeholder(shape=input1.shape), of_input2: tp.Numpy.Placeholder(shape=input2.shape), of_target: tp.Numpy.Placeholder(shape=target.shape), ) -> Dict[str, tp.Numpy]: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=input1.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), name="x_var", ) x_var = of_input1 + v flow.watch_diff(x_var, assert_prediction_grad) marginloss = flow.nn.MarginRankingLoss( of_input1, of_input2, of_target, margin=margin, reduction="none", name="of_marginloss", ) marginloss_mean = flow.nn.MarginRankingLoss( x_var, of_input2, of_target, margin=margin, reduction="mean", name="of_marginloss_reduce_mean", ) marginloss_sum = flow.nn.MarginRankingLoss( of_input1, of_input2, of_target, margin=margin, reduction="sum", name="of_marginloss_reduce_sum", ) with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-3]), momentum=0).minimize(marginloss_mean) return { "of_margin_ranking_loss": marginloss, "of_margin_ranking_loss_mean": marginloss_mean, "of_margin_ranking_loss_sum": marginloss_sum, }
def deconv2d( input, filters, size, name, strides=2, trainable=True, reuse=False, const_init=False, use_bias=False, ): name_ = name if reuse == False else name + "_reuse" # weight : [in_channels, out_channels, height, width] weight_shape = (input.shape[1], filters, size, size) output_shape = ( input.shape[0], input.shape[1], input.shape[2] * strides, input.shape[3] * strides, ) weight = flow.get_variable( name + "-weight", shape=weight_shape, dtype=input.dtype, initializer=flow.random_normal_initializer( stddev=0.02) if not const_init else get_const_initializer(), trainable=trainable, ) output = flow.nn.conv2d_transpose( input, weight, strides=[strides, strides], output_shape=output_shape, padding="SAME", data_format="NCHW", name=name_, ) if use_bias: bias = flow.get_variable( name + "-bias", shape=(filters, ), dtype=input.dtype, initializer=flow.constant_initializer(0.0), trainable=trainable, ) output = flow.nn.bias_add(output, bias, "NCHW") return output
def _conv2d_layer( name, input, filters, kernel_size=3, strides=1, padding="SAME", data_format="NCHW", dilation_rate=1, activation=op_conf_util.kSigmoid, use_bias=True, trainable=True, weight_initializer=flow.random_uniform_initializer(), bias_initializer=flow.constant_initializer(), ): if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) else: kernel_size = tuple(kernel_size) weight_shape = (filters, input.shape[1]) + kernel_size weight = flow.get_variable( name + "-weight", shape=weight_shape, dtype=input.dtype, initializer=weight_initializer, ) output = flow.nn.conv2d(input, weight, strides, padding, data_format, dilation_rate, name=name) if use_bias: bias = flow.get_variable( name + "-bias", shape=(filters, ), dtype=input.dtype, initializer=bias_initializer, ) output = flow.nn.bias_add(output, bias, data_format) if activation is not None: if activation == op_conf_util.kRelu: output = flow.math.relu(output) elif activation == op_conf_util.kSigmoid: output = flow.math.sigmoid(output) else: raise NotImplementedError return output
def do_scatter_nd(indices_blob, updates_blob): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "updates", shape=updates.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), ) x = flow.cast_to_current_logical_view(x) x = x + updates_blob y = flow.scatter_nd(indices_blob, x, shape) flow.losses.add_loss(y) flow.watch_diff(x, compare_fn) return y
def dense(input, units, name, use_bias=False, trainable=True, reuse=False, const_init=False): name_ = name if reuse == False else name + "_reuse" in_shape = input.shape in_num_axes = len(in_shape) assert in_num_axes >= 2 inputs = flow.reshape(input, (-1, in_shape[-1])) if in_num_axes > 2 else input weight = flow.get_variable( name="{}-weight".format(name), shape=(units, inputs.shape[1]), dtype=inputs.dtype, # initializer=flow.random_normal_initializer(stddev=0.02) initializer=flow.glorot_uniform_initializer(data_format="NCHW") if not const_init else flow.constant_initializer(0.002), trainable=trainable, model_name="weight", ) out = flow.matmul( a=inputs, b=weight, transpose_b=True, name=name_ + "matmul", ) if use_bias: bias = flow.get_variable( name="{}-bias".format(name), shape=(units, ), dtype=inputs.dtype, initializer=flow.random_normal_initializer(0.0), # if not const_init # else flow.constant_initializer(0.002), trainable=trainable, model_name="bias", ) out = flow.nn.bias_add(out, bias, name=name_ + "_bias_add") out = flow.reshape(out, in_shape[:-1] + (units, )) if in_num_axes > 2 else out return out
def variable_scope_test_job_2(a=of.FixedTensorDef((2, 5))): with of.scope.namespace("job2_scope1"): indices = of.get_variable( "gather_inds", shape=(2, ), dtype=of.int32, initializer=of.constant_initializer(1), trainable=False, ) output = of.gather(a, indices, axis=1) print("indices op name: ", indices.op_name) print("gather op name: ", output.op_name) return output
def do_gather_nd(x_blob, i_blob): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "params", shape=params.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), ) x = flow.cast_to_current_logical_view(x) x = x + x_blob y = flow.gather_nd(x, i_blob) flow.losses.add_loss(y) flow.watch_diff(x, compare_fn) return y
def variable_scope_test_job_1(a=of.FixedTensorDef((1, 3, 6, 6))): with of.scope.namespace("job1_scope1"): convw = of.get_variable( "conv_weight", shape=(5, 3, 3, 3), dtype=a.dtype, initializer=of.random_uniform_initializer(), trainable=True, ) conv = of.nn.conv2d(a, convw, 1, "SAME", "NCHW", name="conv") with of.scope.namespace("job1_scope2"): fcw = of.get_variable( "fc_weight", shape=(180, 10), dtype=a.dtype, initializer=of.random_uniform_initializer(), trainable=True, ) fc = of.matmul(of.reshape(conv, (conv.shape[0], -1)), fcw, name="fc") fcb = of.get_variable( "fc_bias", shape=(10, ), dtype=a.dtype, initializer=of.constant_initializer(1.0), trainable=True, ) fc_bias = of.nn.bias_add(fc, fcb) fcw2 = of.get_variable( "fc2_weight", shape=(10, 20), dtype=a.dtype, initializer=of.random_uniform_initializer(), trainable=True, ) fc2 = of.matmul(fc_bias, fcw2, name="fc2") print("conv_weight op name: ", convw.op_name) print("conv op name: ", conv.op_name) print("fc_weight op name: ", fcw.op_name) print("fc_bias op name: ", fcb.op_name) print("fc op name: ", fc.op_name) print("fc2_weight op name: ", fcw2.op_name) print("fc2 op name: ", fc2.op_name) return fc2
def watch_matmul_diff_job( images: tp.Numpy.Placeholder((3, 3), dtype=flow.float), ) -> None: weight_initializer = flow.constant_initializer(2) weight_shape = (3, 1) weight = flow.get_variable("three-weight", shape=weight_shape, initializer=weight_initializer) weight_broadcast = flow.broadcast_like(weight, like=images, broadcast_axes=(1, )) lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1]) flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(weight_broadcast) flow.watch_diff(weight, watch_diff_handler)
def clip(values_blob): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "values", shape=values.shape, dtype=data_type, initializer=flow.constant_initializer(0), ) x = flow.cast_to_current_logical_view(x) x = x + values_blob y = flow.clip_by_value(x, min, max) flow.losses.add_loss(y) flow.watch_diff(x, grad_cb) return y
def Foo() -> tp.Numpy: with flow.scope.placement("cpu", device_name): w = flow.get_variable( "w", shape=(10, ), dtype=flow.float, initializer=flow.constant_initializer(0), ) ones = flow.constant_like(w, value=1.0, dtype=flow.float) ref, value = flow.experimental.ssp_variable_proxy( w, buffer_size=buffer_size) # do no use `w` again because it's delegated by `ref` and `value` # W_mutable = W_mutable + 1 flow.assign(ref, ref + ones) return value
def Foo() -> tp.Numpy: with flow.scope.placement( "cpu", device_name), flow.experimental.scope.config( ssp_num_stages=buffer_size, ssp_stage_id=0): w = flow.get_variable( "w", shape=(10, ), dtype=flow.float, initializer=flow.constant_initializer(0), ) loss = w + flow.constant_like(w, value=0.0, dtype=flow.float) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [-10.0]), momentum=0).minimize(loss) return loss
def do_gather(x_blob, i_blob): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "params", shape=params.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), ) x = x + x_blob y = flow.gather(x, i_blob, axis=axis, batch_dims=batch_dims) lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [1e-3]) flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(y) flow.watch_diff(x, compare_fn) return y
def do_scatter_nd(indices_blob, updates_blob): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "updates", shape=updates.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), ) x = flow.cast_to_current_logical_view(x) x = x + updates_blob y = flow.scatter_nd(indices_blob, x, shape) flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-3]), momentum=0 ).minimize(y) flow.watch_diff(x, compare_fn) return y
def do_unsorted_segment_sum(x_blob, i_blob): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "data", shape=data.shape, dtype=flow.float32, initializer=flow.constant_initializer(0), ) x = x + x_blob y = flow.math.unsorted_segment_sum(x, i_blob, axis=axis, num_segments=num_segments) flow.losses.add_loss(y) flow.watch_diff(x, compare_fn) return y
def do_gather_nd(x, index): x_var = flow.get_variable( "params", shape=(1,), dtype=x_dtype, initializer=flow.constant_initializer(0, x_dtype), ) x = x + flow.cast_to_current_logical_view(x_var) y = flow.gather_nd(x, index) if need_grad: flow.optimizer.SGD( flow.optimizer.PiecewiseConstantScheduler([], [1e-3]), momentum=0 ).minimize(y) if callable(comp_diff_fn): flow.watch_diff(x, comp_diff_fn) return y
def oneflow_bceloss( of_input: tp.Numpy.Placeholder(shape=input.shape), of_target: tp.Numpy.Placeholder(shape=target.shape), of_weight: tp.Numpy.Placeholder(shape=weight.shape), ) -> Dict[str, tp.Numpy]: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=target.shape, dtype=flow.float32, initializer=flow.constant_initializer(1), name="v", ) x_var = of_input + v flow.watch_diff(x_var, assert_prediction_grad) bceloss = flow.nn.BCELoss(x_var, of_target, of_weight, reduction="none", name="of_mseloss") bceloss_mean = flow.nn.BCELoss( x_var, of_target, of_weight, reduction="mean", name="of_mseloss_reduce_mean", ) bceloss_sum = flow.nn.BCELoss( x_var, of_target, of_weight, reduction="sum", name="of_mseloss_reduce_sum", ) # Because our gradient is use "mean" mode to compute with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-3]), momentum=0).minimize(bceloss_mean) return { "of_bce_loss": bceloss, "of_bce_loss_mean": bceloss_mean, "of_bce_loss_sum": bceloss_sum, }
def conv2d( input, filters, size, name, strides=2, padding="same", trainable=True, reuse=False, const_init=False, use_bias=True, ): name_ = name if reuse == False else name + "_reuse" # (output_dim, k_h, k_w, input.shape[3]) if NHWC weight_shape = (filters, input.shape[1], size, size) weight = flow.get_variable( name + "-weight", shape=weight_shape, dtype=input.dtype, initializer=flow.random_normal_initializer( stddev=0.02) if not const_init else get_const_initializer(), trainable=trainable, reuse=reuse, ) output = flow.nn.compat_conv2d( input, weight, strides=[strides, strides], padding=padding, data_format="NCHW", name=name_, ) if use_bias: bias = flow.get_variable( name + "-bias", shape=(filters, ), dtype=input.dtype, initializer=flow.constant_initializer(0.0), trainable=trainable, reuse=reuse, ) output = flow.nn.bias_add(output, bias, "NCHW") return output