def BuildModelInitInstruction(builder): upstream_signature = op_node_signature_pb.OpNodeSignature() op_conf.scope_symbol_id = flow.current_scope().symbol_id op_attribute = c_api_util.InferOpConf(op_conf, upstream_signature) parallel_conf = flow.current_scope( ).device_parallel_desc_symbol.parallel_conf cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString( str(op_attribute)) builder.StatelessCall(cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo)
def build(builder): op_conf = op_conf_pb.OperatorConf() device_tag = flow.current_scope( ).device_parallel_desc_symbol.device_tag op_conf.device_tag = device_tag op_conf.name = op_name op_conf.user_conf.op_type_name = "logical_slice" op_conf.user_conf.input["x"].s.append("{}/x_0".format(op_name)) op_conf.user_conf.output["y"].s.append("{}/y_0".format(op_name)) parallel_conf = input_blob_object.parallel_desc_symbol.parallel_conf op_conf.user_conf.attr["parallel_conf"].at_string = str( parallel_conf) op_conf.user_conf.attr["start"].at_list_int64.val[:] = start op_conf.user_conf.attr["stop"].at_list_int64.val[:] = stop op_conf.user_conf.attr["step"].at_list_int64.val[:] = [ 1 ] * len(start) bn_in_op2blob_object = ( oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()) bn_in_op2blob_object["x_0"] = input_blob_object op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object, scope_symbol_id) cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString( str(op_attribute)) builder.StatelessCall( cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo, ) Yield(bn_in_op2blob_object["y_0"])
def BuildAssignInstruction(builder): op_conf = op_conf_pb.OperatorConf() device_tag = flow.current_scope( ).device_parallel_desc_symbol.device_tag op_conf.device_tag = device_tag op_name = id_util.UniqueStr(OP_PREFIX) op_conf.name = op_name op_conf.user_conf.op_type_name = "logical_slice_assign" op_conf.user_conf.input["value"].s.append("{}/value_0".format(op_name)) op_conf.user_conf.input["ref"].s.append("{}/ref_0".format(op_name)) parallel_conf = ref_blob_object.parallel_desc_symbol.parallel_conf op_conf.user_conf.attr["parallel_conf"].at_string = str(parallel_conf) op_conf.user_conf.attr["start"].at_list_int64.val[:] = start op_conf.user_conf.attr["stop"].at_list_int64.val[:] = stop op_conf.user_conf.attr["step"].at_list_int64.val[:] = [1] * len(start) bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject( ) bn_in_op2blob_object["ref_0"] = ref_blob_object bn_in_op2blob_object["value_0"] = value_blob_object op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object, scope_symbol_id) cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString( str(op_attribute)) builder.StatelessCall(cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo)
def BuildInstruction(builder): parallel_conf = flow.current_scope( ).device_parallel_desc_symbol.parallel_conf cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString( str(op_attribute)) builder.StatelessCall(cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo)
def distributed_partial_fc_sample( weight: oneflow._oneflow_internal.BlobDesc, label: oneflow._oneflow_internal.BlobDesc, num_sample: int, name: Optional[str] = None, ) -> oneflow._oneflow_internal.BlobDesc: parallel_num = flow.current_scope().device_parallel_desc_symbol.parallel_num assert num_sample % parallel_num == 0 assert weight.shape[0] % parallel_num == 0 return ( flow.user_op_builder( name if name is not None else id_util.UniqueStr("DistributedPartialFcSample_") ) .Op("distributed_partial_fc_sample") .Input("weight", [weight]) .Input("label", [label]) .Attr("num_sample", num_sample) .Output("mapped_label") .Output("sampled_label") .Output("sampled_weight") .Build() .InferAndTryRun() .RemoteBlobList() )
def _GetCpu0VariableBlobFromNumpy( np_array: np.ndarray, dtype: flow.dtype) -> oneflow._oneflow_internal.EagerConsistentBlob: """ Add a variable on cpu 0, and feed the value of `np_array` Note: dtype argument cannot be eliminated by convert_numpy_dtype_to_oneflow_dtype(np_array.dtype), because np.int8 == np.char and numpy_dtype_to_oneflow_dtype(oneflow_dtype_to_numpy_dtype(flow.int8)) may be flow.char """ with flow.scope.placement("cpu", "0:0"): op_name = id_util.UniqueStr(OP_PREFIX) op_conf = get_variable.GenerateVariableOpConf( name=op_name, shape=np_array.shape, dtype=dtype, initializer=initializer_util.zeros_initializer(dtype=dtype), trainable=False, ) current_parallel_desc_sym = flow.current_scope( ).device_parallel_desc_symbol device_tag = current_parallel_desc_sym.device_tag op_conf.device_tag = device_tag op_attribute = op_infer_util.Infer(op_conf, {}) var_blob = get_variable.CreateEagerVariableBlob(op_attribute, job_name=FAKE_JOB_NAME) interface_op_read_and_write.FeedValueToInterfaceBlobObject( var_blob.blob_object, np_array) return var_blob
def BuildInputInstruction(builder): op_attribute = arg_blob_def.EagerAddAndInferOp(input_op_conf) scope = flow.current_scope() parallel_conf = scope.device_parallel_desc_symbol.parallel_conf cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString( str(op_attribute)) builder.StatelessCall(cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo)
def _UnderSingleDevicePlacementScope(f, *args): parallel_desc_symbol = flow.current_scope().device_parallel_desc_symbol for (machine_id, device_id) in _EachMachineIdAndDeviceId(parallel_desc_symbol): mch_dev_str = "@%d:%d" % (machine_id, device_id) with flow.scope.placement(parallel_desc_symbol.device_tag, mch_dev_str): return f(*args)
def _AssignOpConf(): op_conf = op_conf_pb.OperatorConf() op_conf.name = "assign" op_conf.assign_conf.ref = "assign/ref" op_conf.assign_conf.value = "assign/value" device_tag = flow.current_scope().device_parallel_desc_symbol.device_tag op_conf.device_tag = device_tag return op_conf
def __init__(self, op_name, op_type_name=None): self.op_conf_ = op_conf_util.OperatorConf() self.op_conf_.name = op_name if op_type_name is not None: self.op_conf_.user_conf.op_type_name = op_type_name device_tag = flow.current_scope( ).device_parallel_desc_symbol.device_tag self.op_conf_.device_tag = device_tag self.output_arg_key_list_ = []
def CurJobAddMirroredOp(op_conf, scope_symbol=None): assert not hob.consistent_view_enabled(None) if scope_symbol is None: scope_symbol = flow.current_scope() op_conf.scope_symbol_id = scope_symbol.symbol_id if not op_conf.HasField("device_tag"): device_tag = scope_symbol.device_parallel_desc_symbol.device_tag op_conf.device_tag = device_tag op_attr = c_api_util.CurJobBuildAndInferCtx_AddAndInferMirroredOp(op_conf) if c_api_util.IsInterfaceOpConf(op_conf): sess = session_ctx.GetDefaultSession() sess.AddInfo4InterfaceOpName(op_conf.name, op_attr) return op_attr
def compile(self, op_list): self._check_status(self.SessionStatus.OPEN) scope = flow.current_scope() device_tag = scope.device_parallel_desc_symbol.device_tag for op_conf in op_list: if _need_check_device_tag( op_conf) and op_conf.device_tag != device_tag: print( "WARNING: the device_tag of op {} is not equal to the device_tag of seesion's current scope ({} vs. {}), which may cause the op graph to be incompatible" .format(op_conf.name, op_conf.device_tag, device_tag)) compile_ctx.CurJobAddOp(op_conf) oneflow._oneflow_internal.CurJobBuildAndInferCtx_Complete() oneflow._oneflow_internal.CurJobBuildAndInferCtx_Rebuild()
def EagerAddAndInferOp(self, op_conf: op_conf_util.OperatorConf) -> Any: parallel_symbol = flow.current_scope().device_parallel_desc_symbol if ( parallel_symbol.device_tag == "gpu" and list(dict(parallel_symbol.machine_id2device_id_list).keys()) == [0] and (parallel_symbol.parallel_num == 1) ): device_tag = "gpu" device_ids = "@0:%s" % parallel_symbol.machine_id2device_id_list[0][0] else: device_tag = "cpu" device_ids = "@0:0" with flow.scope.placement(device_tag, device_ids): return compile_context.CurJobAddConsistentOp(op_conf)
def distribute_add(xs, name=None): assert flow.current_scope( ).device_parallel_desc_symbol.parallel_num == len(xs) if name is None: name = id_util.UniqueStr("DistributeAdd_") op_conf = op_conf_util.OperatorConf() op_conf.name = name getattr(op_conf.distribute_add_conf, "in").extend([_SoleConsistentLbn(x) for x in xs]) op_conf.distribute_add_conf.out = "out" interpret_util.ConsistentForward(op_conf) lbi = logical_blob_id_util.LogicalBlobId() lbi.op_name = op_conf.name lbi.blob_name = "out" return remote_blob_util.RemoteBlob(lbi)
def two_stage_reduce(x, axis=None, keepdims=False, op_type_name=None, name=None): assert check_x_dictribute(x, axis) axis = _check_axis(axis, x.shape) device_stage_out_list = [] device_stage_count_list = [] distribute_axis = x.distribute.axis x_list = flow.advanced.distribute_split(x, axis=distribute_axis) parallel_desc_symbol = flow.current_scope().device_parallel_desc_symbol device_tag = parallel_desc_symbol.device_tag parallel_id = 0 for ( machine_id, device_ids, ) in parallel_desc_symbol.machine_id2device_id_list.items(): for device_id in device_ids: with flow.scope.placement( device_tag, "@" + str(machine_id) + ":" + str(device_id)): (device_stage_out, device_stage_count) = reduce_device_stage( x_list[parallel_id], axis, op_type_name + "_device_stage", name + "_device_stage" + str(parallel_id), ) device_stage_out_list.append(device_stage_out) device_stage_count_list.append(device_stage_count) parallel_id += 1 device_stage_out = flow.advanced.distribute_concat(device_stage_out_list, axis=distribute_axis) device_stage_count = flow.advanced.distribute_concat( device_stage_count_list, axis=distribute_axis) device_stage_out = device_stage_out.with_distribute( flow.distribute.broadcast()) device_stage_count = device_stage_count.with_distribute( flow.distribute.broadcast()) out = reduce_global_stage( device_stage_out, device_stage_count, axis, keepdims, op_type_name + "_global_stage", name + "_global_stage", ) return out
def distribute_clone(x, name=None): if name is None: name = id_util.UniqueStr("DistributeClone_") op_conf = op_conf_util.OperatorConf() op_conf.name = name setattr(op_conf.distribute_clone_conf, "in", x.unique_name) parallel_size = flow.current_scope( ).device_parallel_desc_symbol.parallel_num op_conf.distribute_clone_conf.out.extend( ["out_%d" % i for i in range(parallel_size)]) interpret_util.ConsistentForward(op_conf) ret = [] for i in range(parallel_size): out = "out_%d" % i lbi = logical_blob_id_util.LogicalBlobId() lbi.op_name = op_conf.name lbi.blob_name = out ret.append(remote_blob_util.RemoteBlob(lbi)) return tuple(ret)
def Foo(): test_case.assertEqual( "cpu", flow.current_scope().device_parallel_desc_symbol.device_tag) return flow.get_variable("w", (10, ), initializer=flow.constant_initializer(1))
def __init__(self, *args, **kwargs): UserOpConfBuilder.__init__(self, *args, **kwargs) self.user_op_module.op_conf.scope_symbol_id = flow.current_scope( ).symbol_id
def Infer(op_conf, ibn2blob_object, scope_symbol_id=None): if scope_symbol_id is None: scope_symbol_id = flow.current_scope().symbol_id op_conf.scope_symbol_id = scope_symbol_id upstream_signature = MakeUpstreamSignature(ibn2blob_object) return c_api_util.InferOpConf(op_conf, upstream_signature)
def ConsistentForward(op_conf, scope_symbol=None): if scope_symbol is None: scope_symbol = flow.current_scope() func = enable_if.unique([LazyInfer, EagerForward]) return func(compile_ctx.CurJobAddConsistentOp, op_conf, scope_symbol)