コード例 #1
0
def _batch_norm(inputs, name, trainable=True, training=True):
    params_shape = [inputs.shape[1]]
    # Float32 required to avoid precision-loss when using fp16 input/output
    params_dtype = flow.float32 if inputs.dtype == flow.float16 else inputs.dtype

    if not flow.current_global_function_desc().IsTrainable() or not trainable:
        training = False

    with flow.scope.namespace(name):
        beta = flow.get_variable(
            name="beta",
            shape=params_shape,
            dtype=params_dtype,
            initializer=flow.zeros_initializer(),
            trainable=trainable,
            distribute=distribute_util.broadcast(),
        )

        gamma = flow.get_variable(
            name="gamma",
            shape=params_shape,
            dtype=params_dtype,
            initializer=flow.ones_initializer(),
            trainable=trainable,
            distribute=distribute_util.broadcast(),
        )

        moving_mean = flow.get_variable(
            name="moving_mean",
            shape=params_shape,
            dtype=params_dtype,
            initializer=flow.zeros_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
        )

        moving_variance = flow.get_variable(
            name="moving_variance",
            shape=params_shape,
            dtype=params_dtype,
            initializer=flow.ones_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
        )
    builder = (flow.user_op_builder(
        id_util.UniqueStr(name)).Op("normalization").Input(
            "x", [inputs]).Input("moving_mean", [moving_mean]).Input(
                "moving_variance",
                [moving_variance]).Input("gamma", [gamma]).Input(
                    "beta", [beta]).Output("y").Attr("axis", 1).Attr(
                        "epsilon",
                        1.001e-5).Attr("training",
                                       training).Attr("momentum", 0.997))
    if trainable and training:
        builder = builder.Output("mean").Output("inv_variance")
    return builder.Build().InferAndTryRun().RemoteBlobList()[0]
コード例 #2
0
def prelu(
    inputs: remote_blob_util.BlobDef,
    alpha_initializer: Optional[op_conf_util.InitializerConf] = None,
    alpha_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    shared_axes: Optional[Sequence[int]] = None,
    trainable: bool = True,
    name: str = "PRelu",
    model_distribute: distribute_util.Distribute = distribute_util.broadcast(),
) -> remote_blob_util.BlobDef:
    alpha_shape = list(inputs.shape[1:])
    if shared_axes is not None:
        for i in shared_axes:
            assert i >= 1 and i < len(inputs.shape)
            alpha_shape[i - 1] = 1

    if alpha_initializer is None:
        alpha_initializer = flow.constant_initializer(0)

    with flow.scope.namespace(name):
        alpha = flow.get_variable(
            name="alpha",
            shape=alpha_shape,
            dtype=inputs.dtype,
            initializer=alpha_initializer,
            regularizer=alpha_regularizer,
            trainable=trainable,
            distribute=model_distribute,
            reuse=False,
        )

    op = (flow.user_op_builder(name).Op("prelu").Input("x", [inputs]).Input(
        "alpha", [alpha]).Output("y").Build())
    return op.InferAndTryRun().SoleOutputBlob()
コード例 #3
0
ファイル: get_variable.py プロジェクト: zhcute/oneflow
def get_eager_variable(
    name,
    shape=None,
    dtype=None,
    initializer=None,
    regularizer=None,
    trainable=None,
    model_name=None,
    random_seed=None,
    distribute=distribute_util.broadcast(),
    reuse=True,
):
    assert isinstance(name, str)
    assert isinstance(
        shape, (list, tuple)
    ), "param shape should be a list or tuple of dimension"

    job_name = c_api_util.JobBuildAndInferCtx_GetCurrentJobName()
    name = name_scope.GetJobNameScopePrefix(job_name) + name
    sess = session_ctx.GetDefaultSession()
    var_blob, job_var_blob = sess.TryGetVariableBlobOfJobFromStash(job_name, name)

    if reuse is False:
        assert job_var_blob is None, (
            "varaible '{}' already exists, "
            "getting the same variable is not allowed "
            "when reuse is False".format(name)
        )

    if job_var_blob is None:
        op_conf = _GenerateVariableOpConf(
            name=name,
            shape=shape,
            dtype=dtype,
            initializer=initializer,
            regularizer=regularizer,
            trainable=trainable,
            model_name=model_name,
            random_seed=random_seed,
            distribute=distribute,
        )
        op_attribute = compile_context.CurJobAddConsistentOp(op_conf)
        if var_blob is None:
            var_blob = _CreateEagerVariableBlob(op_attribute)
            op_executor.EagerInitVariableBlob(sess, op_conf, var_blob)

        assert isinstance(var_blob, remote_blob_util.EagerConsistentBlob)
        sess.StashVariableBlob4Job(job_name, op_conf.name, var_blob)
    else:
        assert isinstance(job_var_blob, remote_blob_util.EagerConsistentBlob)
        assert isinstance(var_blob, remote_blob_util.EagerConsistentBlob)
        assert var_blob.IdenticalTo(job_var_blob)

    bw_blob_register = gradient_util.GetDefaultBackwardBlobRegister()
    bw_blob_register.TrySetObject4BlobName(
        var_blob.logical_blob_name, var_blob.blob_object
    )
    return var_blob
コード例 #4
0
def GenerateVariableOpConf(
    name,
    shape,
    dtype=None,
    initializer=None,
    regularizer=None,
    trainable=None,
    model_name=None,
    random_seed=None,
    distribute=distribute_util.broadcast(),
):
    op_conf = op_conf_util.OperatorConf()
    op_conf.name = name
    op_conf.variable_conf.shape.dim.extend(shape)

    assert dtype is not None
    op_conf.variable_conf.data_type = dtype.oneflow_proto_dtype

    if rt_mode.CurrentMode() == rt_mode.NORMAL_MODE:
        root_path = None
    else:
        root_path = (
            compile_context.GetCurJobConfigProto().default_initialize_with_snapshot_path()
        )
        dir_path = os.path.join(root_path, name)
        file_path = os.path.join(dir_path, "out")
    if root_path and os.path.isfile(file_path):
        op_conf.variable_conf.initialize_with_snapshot.path = dir_path
        op_conf.variable_conf.initialize_with_snapshot.key = "out"
    else:
        if root_path:
            print("{} not found, will be initialized".format(file_path))
        if initializer is not None:
            op_conf.variable_conf.initializer.CopyFrom(initializer)

    if regularizer is not None:
        op_conf.variable_conf.regularizer.CopyFrom(regularizer)

    if trainable is not None:
        op_conf.trainable = trainable

    if model_name is not None:
        op_conf.variable_conf.model_name = model_name

    if type(distribute) is distribute_util.SplitDistribute:
        op_conf.variable_conf.split_axis.value = distribute.axis
    else:
        op_conf.variable_conf.split_axis.ClearField("value")

    if random_seed is not None:
        op_conf.variable_conf.random_seed = random_seed

    op_conf.variable_conf.out = "out"
    return op_conf
コード例 #5
0
def get_lazy_variable(
        name,
        shape=None,
        dtype=None,
        initializer=None,
        regularizer=None,
        trainable=None,
        model_name=None,
        random_seed=None,
        distribute=distribute_util.broadcast(),
        reuse=True,
):
    assert isinstance(name, str)
    assert isinstance(
        shape,
        (list, tuple)), "param shape should be a list or tuple of dimension"

    job_name = c_api_util.JobBuildAndInferCtx_GetCurrentJobName()
    name = name_scope.GetJobNameScopePrefix(job_name) + name
    sess = session_ctx.GetDefaultSession()
    var_blob, job_var_blob = sess.TryGetVariableBlobOfJobFromStash(
        job_name, name)

    if reuse is False:
        assert job_var_blob is None, (
            "varaible '{}' already exists, "
            "getting the same variable is not allowed "
            "when param reuse is False".format(name))

    if job_var_blob is None:
        op_conf = GenerateVariableOpConf(
            name=name,
            shape=shape,
            dtype=dtype,
            initializer=initializer,
            regularizer=regularizer,
            trainable=trainable,
            model_name=model_name,
            random_seed=random_seed,
            distribute=distribute,
        )
        job_var_blob = _CreateVariableBlob(op_conf)
        assert isinstance(job_var_blob, remote_blob_util.LazyConsistentBlob)
        sess.StashVariableBlob4Job(job_name, op_conf.name, job_var_blob)
        if var_blob is not None:
            assert isinstance(var_blob, remote_blob_util.LazyConsistentBlob)
            assert var_blob.IdenticalTo(job_var_blob)
    else:
        assert isinstance(job_var_blob, remote_blob_util.LazyConsistentBlob)
        assert isinstance(var_blob, remote_blob_util.LazyConsistentBlob)
        assert var_blob.IdenticalTo(job_var_blob)

    return job_var_blob
コード例 #6
0
ファイル: get_variable.py プロジェクト: zhcute/oneflow
def api_get_variable(
    name: str,
    shape: Optional[Sequence[int]] = None,
    dtype: Optional[dtype_util.dtype] = dtype_util.float32,
    initializer: Optional[op_conf_util.InitializerConf] = None,
    regularizer: Optional[op_conf_util.RegularizerConf] = None,
    trainable: Optional[bool] = None,
    model_name: Optional[str] = None,
    random_seed: Optional[int] = None,
    distribute: distribute_util.Distribute = distribute_util.broadcast(),
    reuse: bool = True,
) -> remote_blob_util.BlobDef:
    r"""Create a variable or retrieve an existing one.

    Args:
        name: Name of this variable. One variable could be shared by multiple OneFlow functions. `None` by defauilt
        shape: Shape of the variable. `None` by defauilt
        dtype: Data type of the variable. `None` by defauilt
        initializer: A initializer object. For instance, a :func:`~oneflow.ones_initializer`. `None` by defauilt
        trainable: A `bool` to indicate if this variable is trainable. `True` by defauilt
        model_name: A `string`. `'weight'` or `'bias'`. `None` by defauilt
        random_seed: Random seed for random initializers. `None` by defauilt
    """
    api = enable_if.unique([get_lazy_variable, get_eager_variable])
    return api(
        name,
        shape=shape,
        dtype=dtype,
        initializer=initializer,
        regularizer=regularizer,
        trainable=trainable,
        model_name=model_name,
        random_seed=random_seed,
        distribute=distribute,
        reuse=reuse,
    )
コード例 #7
0
def api_get_variable(
    name: str,
    shape: Optional[Sequence[int]] = None,
    dtype: Optional[dtype_util.dtype] = dtype_util.float32,
    initializer: Optional[op_conf_util.InitializerConf] = None,
    regularizer: Optional[op_conf_util.RegularizerConf] = None,
    trainable: Optional[bool] = None,
    model_name: Optional[str] = None,
    random_seed: Optional[int] = None,
    distribute: distribute_util.Distribute = distribute_util.broadcast(),
    reuse: bool = True,
) -> remote_blob_util.BlobDef:
    r"""Create a variable or retrieve an existing one.

    Args:
        name: Name of this variable. One variable could be shared by multiple OneFlow functions. `None` by defauilt
        shape: Shape of the variable. `None` by defauilt
        dtype: Data type of the variable. `None` by defauilt
        initializer: A initializer object. For instance, a :func:`~oneflow.ones_initializer`. `None` by defauilt
        trainable: A `bool` to indicate if this variable is trainable. `True` by defauilt
        model_name: A `string`. `'weight'` or `'bias'`. `None` by defauilt
        random_seed: Random seed for random initializers. `None` by defauilt

    For example: 

    Example 1: 

    .. code-block:: python 

        import oneflow as flow
        import oneflow.typing as tp


        def watch_handler(y: tp.Numpy):
            print("out", y)


        @flow.global_function()
        def variable_Job() -> None:
            init = flow.constant_initializer(1.25)
            variable = flow.get_variable(
                "variable-weight",
                shape=(1, 3, 2, 2),
                initializer=init,
                trainable=True
            )
            flow.watch(variable, watch_handler)


        checkpoint = flow.train.CheckPoint()
        checkpoint.init()
        variable_Job()

        # out [[[[1.25 1.25]
        #        [1.25 1.25]]

        #       [[1.25 1.25]
        #        [1.25 1.25]]

        #       [[1.25 1.25]
        #        [1.25 1.25]]]]
    
    Example 2: 

    .. code-block:: python 

        import oneflow as flow
        import numpy as np
        import oneflow.typing as tp


        def conv2d(input, filters, kernel_size, strides, padding, name):
            input_shape = input.shape
            weight_initializer = flow.truncated_normal(0.1)
            weight_regularizer = flow.regularizers.l2(0.0005)
            weight_shape = (filters,
                            input_shape[1],
                            kernel_size[0],
                            kernel_size[1])

            weight = flow.get_variable(
                name + "-weight",
                shape=weight_shape,
                initializer=weight_initializer,
                regularizer=weight_regularizer,
            )
            return flow.nn.conv2d(input, weight, strides, padding, name=name)


        @flow.global_function()
        def conv2d_Job(x: tp.Numpy.Placeholder((1, 64, 32, 32))
        ) -> tp.Numpy:
            conv = conv2d(x,
                        filters=128,
                        kernel_size=[3, 3],
                        strides=2,
                        padding='SAME',
                        name="Convlayer")
            return conv


        x = np.random.randn(1, 64, 32, 32).astype(np.float32)
        out = conv2d_Job(x)

        # out.shape (1, 128, 16, 16)

    """
    api = enable_if.unique([get_lazy_variable, get_eager_variable])
    return api(
        name,
        shape=shape,
        dtype=dtype,
        initializer=initializer,
        regularizer=regularizer,
        trainable=trainable,
        model_name=model_name,
        random_seed=random_seed,
        distribute=distribute,
        reuse=reuse,
    )
コード例 #8
0
ファイル: blob_desc.py プロジェクト: Sodu-Qinming/Oneflow
 def with_broadcast_distribute(self):
     return self.with_distribute(distribute_util.broadcast())
コード例 #9
0
ファイル: layers.py プロジェクト: xy548/oneflow
def batch_normalization(
    inputs: remote_blob_util.BlobDef,
    axis: int = -1,
    momentum: float = 0.99,
    epsilon: float = 0.001,
    center: bool = True,
    scale: bool = True,
    beta_initializer: Optional[op_conf_util.InitializerConf] = None,
    gamma_initializer: Optional[op_conf_util.InitializerConf] = None,
    beta_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    gamma_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    moving_mean_initializer: Optional[op_conf_util.InitializerConf] = None,
    moving_variance_initializer: Optional[op_conf_util.InitializerConf] = None,
    trainable: bool = True,
    training: bool = True,
    name: str = "BatchNorm",
) -> remote_blob_util.BlobDef:
    r"""Analogous to `tf.keras.layers.BatchNormalization <https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization>`_

    Args:
        inputs (remote_blob_util.BlobDef): Input `Blob`.
        axis (int, optional): An int specifies the aixs that should be normalized . Default is -1, which normalizes the last axis.
        momentum (float, optional):  A float specifies the momontum for the moving average. Defaults to 0.99.
        epsilon (float, optional): A small float added to avoid division by zero. Defaults to 0.001.
        center (bool, optional): A boolean specifies whether to add offset to normalized `Blob`. Defaults to True.
        scale (bool, optional): A boolean specifies whether to multiply normalized `Blob` by gamma. Defaults to True.
        beta_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for beta. Defaults to None.
        gamma_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for gamma. Defaults to None.
        beta_regularizer (Optional[op_conf_util.RegularizerConf], optional): Regularizer for beta. Defaults to None.
        gamma_regularizer (Optional[op_conf_util.RegularizerConf], optional): Regularizer for gamma. Defaults to None.
        moving_mean_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for moving mean. Defaults to None.
        moving_variance_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for moving variance. Defaults to None.
        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
        training (bool, optional): A boolean specifies whether now is training the model. Defaults to True.
        name (Optional[str], optional): This layer's name. Defaults to None.

    Returns:
        remote_blob_util.BlobDef:  A `Blob` with same shape of input.

    Raises:
        ValueError: If axis is out of dimension of input.
    """
    if axis < 0:
        axis += len(inputs.shape)
    assert axis >= 0 and axis < len(inputs.shape)

    params_shape = [inputs.shape[axis]]
    # Float32 required to avoid precision-loss when using fp16 input/output
    params_dtype = flow.float32 if inputs.dtype == flow.float16 else inputs.dtype

    if not flow.current_global_function_desc().IsTrainable() or not trainable:
        training = False

    with flow.scope.namespace(name):
        if center:
            beta = flow.get_variable(
                name="beta",
                shape=params_shape,
                dtype=params_dtype,
                initializer=beta_initializer or flow.zeros_initializer(),
                regularizer=beta_regularizer,
                trainable=trainable,
                distribute=distribute_util.broadcast(),
                reuse=False,
            )
        else:
            beta = flow.constant(0,
                                 dtype=params_dtype,
                                 shape=params_shape,
                                 name="beta")

        if scale:
            gamma = flow.get_variable(
                name="gamma",
                shape=params_shape,
                dtype=params_dtype,
                initializer=gamma_initializer or flow.ones_initializer(),
                regularizer=gamma_regularizer,
                trainable=trainable,
                distribute=distribute_util.broadcast(),
                reuse=False,
            )
        else:
            gamma = flow.constant(1,
                                  dtype=params_dtype,
                                  shape=params_shape,
                                  name="gamma")

        moving_mean = flow.get_variable(
            name="moving_mean",
            shape=params_shape,
            dtype=params_dtype,
            initializer=moving_mean_initializer or flow.zeros_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
            reuse=False,
        )

        moving_variance = flow.get_variable(
            name="moving_variance",
            shape=params_shape,
            dtype=params_dtype,
            initializer=moving_variance_initializer or flow.ones_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
            reuse=False,
        )

    if flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu":
        if training:
            reduce_axis = []
            for dim in range(len(inputs.shape)):
                if dim != axis:
                    reduce_axis.append(dim)
            mean, variance = flow.nn.moments(inputs,
                                             reduce_axis,
                                             keepdims=False)

            def update_moving(moving, this_batch):
                moving_identity = flow.identity(moving)
                flow.assign(
                    moving,
                    momentum * moving_identity + (1 - momentum) * this_batch)

            update_moving(moving_mean, mean)
            update_moving(moving_variance, variance)

            return flow.nn.batch_normalization(
                x=inputs,
                mean=mean,
                variance=variance,
                offset=beta,
                scale=gamma,
                variance_epsilon=epsilon,
                axis=axis,
                name=name,
            )
        else:
            mean = moving_mean
            variance = moving_variance
            return flow.nn.batch_normalization(
                x=inputs,
                mean=mean,
                variance=variance,
                offset=beta,
                scale=gamma,
                variance_epsilon=epsilon,
                axis=axis,
                name=name,
            )
    else:
        builder = (flow.user_op_builder(name).Op("normalization").Input(
            "x", [inputs]).Input("moving_mean", [moving_mean]).Input(
                "moving_variance",
                [moving_variance]).Input("gamma", [gamma]).Input(
                    "beta", [beta]).Output("y").Attr("axis", axis).Attr(
                        "epsilon",
                        epsilon).Attr("training",
                                      training).Attr("momentum", momentum))
        if trainable and training:
            builder = builder.Output("mean").Output("inv_variance")

        return builder.Build().InferAndTryRun().RemoteBlobList()[0]
コード例 #10
0
ファイル: layers.py プロジェクト: xy548/oneflow
def layer_norm(
    inputs: remote_blob_util.BlobDef,
    center: bool = True,
    scale: bool = True,
    trainable: bool = True,
    begin_norm_axis: int = 1,
    begin_params_axis: int = -1,
    epsilon: float = 1e-5,
    name: str = "LayerNorm",
) -> remote_blob_util.BlobDef:
    r"""Analogous to `tf.keras.layers.LayerNormalization <https://www.tensorflow.org/api_docs/python/tf/keras/layers/LayerNormalization>`_

    Args:
        inputs (remote_blob_util.BlobDef): Input `Blob`.
        center (bool, optional): A boolean specifies whether to shift input `Blob`. Defaults to True.
        scale (bool, optional): A boolean specifies whether to scaleinput `Blob`. Defaults to True.
        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
        begin_norm_axis (int, optional): An integer specifies which axis to normalize at first. Defaults to 1.
        begin_params_axis (int, optional):  An integer specifies which axis params at . Defaults to -1.
        epsilon (float, optional): A small float is added to avoid division by zero. Defaults to 1e-5.
        name (Optional[str], optional):  This layer's name. Defaults to None.

    Returns:
        remote_blob_util.BlobDef: A normalized `Blob` with same shape of input.
    """
    if center is False and scale is False:
        trainable = False

    beta = None
    gamma = None

    param_shape = inputs.shape[begin_params_axis:]
    if center:
        with flow.scope.namespace(name):
            beta = flow.get_variable(
                name="beta",
                shape=param_shape,
                dtype=inputs.dtype,
                initializer=flow.constant_initializer(0.0),
                trainable=trainable,
                model_name="beta",
                distribute=distribute_util.broadcast(),
                reuse=False,
            )

    if scale:
        with flow.scope.namespace(name):
            gamma = flow.get_variable(
                name="gamma",
                shape=param_shape,
                dtype=inputs.dtype,
                initializer=flow.constant_initializer(1.0),
                trainable=trainable,
                model_name="gamma",
                distribute=distribute_util.broadcast(),
                reuse=False,
            )

    if flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu":
        if begin_norm_axis < 0:
            begin_norm_axis = begin_norm_axis + len(inputs.shape)

        reduce_axis = []
        for dim in range(len(inputs.shape)):
            if dim >= begin_norm_axis:
                reduce_axis.append(dim)
        mean, variance = flow.nn.moments(inputs, reduce_axis, keepdims=True)

        axis = begin_norm_axis
        normalized = flow.nn.batch_normalization(
            x=inputs,
            mean=mean,
            variance=variance,
            variance_epsilon=epsilon,
            axis=axis,
            name=name,
        )
        nd_params_shape = [1] * (len(inputs.shape) -
                                 len(param_shape)) + list(param_shape)
        affined = normalized
        if gamma:
            gamma = flow.reshape(gamma, nd_params_shape)
            affined *= scale
        if beta:
            beta = flow.reshape(beta, nd_params_shape)
            affined += beta
        return affined
    elif flow.current_scope().device_parallel_desc_symbol.device_tag == "gpu":
        op_builder = (flow.user_op_builder(name).Op("layer_norm").Input(
            "x", [inputs]).Output("y").Output("mean").Output("inv_variance"))

        if beta is not None:
            op_builder.Input("beta", [beta])
        if gamma is not None:
            op_builder.Input("gamma", [gamma])
            op_builder.Output("normalized")
        op_builder.Attr("center", center)
        op_builder.Attr("scale", scale)
        op_builder.Attr("begin_norm_axis", begin_norm_axis)
        op_builder.Attr("begin_params_axis", begin_params_axis)
        op_builder.Attr("epsilon", epsilon)

        return op_builder.Build().InferAndTryRun().RemoteBlobList()[0]
    else:
        raise NotImplementedError
コード例 #11
0
ファイル: layers.py プロジェクト: xy548/oneflow
def dense(
    inputs: remote_blob_util.BlobDef,
    units: int,
    activation: Optional[Callable[[remote_blob_util.BlobDef, str],
                                  remote_blob_util.BlobDef]] = None,
    use_bias: bool = True,
    kernel_initializer: Optional[op_conf_util.InitializerConf] = None,
    bias_initializer: Optional[op_conf_util.InitializerConf] = None,
    kernel_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    bias_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    trainable: bool = True,
    name: str = "Dense",
    model_distribute: distribute_util.Distribute = distribute_util.broadcast(),
) -> remote_blob_util.BlobDef:
    r"""Analogous to `tf.keras.layers.Dense <https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense>`_

    Args:
        inputs (remote_blob_util.BlobDef): A 2D input `Blob`.
        units (int): A positive integer for the dimensionality of the output space.
        activation (Optional[remote_blob_util.BlobDef], optional):  Activation function. Defaults to None.
        use_bias (bool, optional): A boolean specifies whether to use a bias vector. Defaults to True.
        kernel_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for the kernel weights matrix. Defaults to None.
        bias_initializer (Optional[op_conf_util.InitializerConf], optional): [description]. Defaults to None.
        kernel_regularizer (Optional[op_conf_util.RegularizerConf], optional): [description]. Defaults to None.
        bias_regularizer (Optional[op_conf_util.RegularizerConf], optional): Regularizer for the bias vector. Defaults to None.
        trainable (bool, optional): A boolean specifies whether to train the variables. Defaults to True.
        name (Optional[str], optional): This layer's name. Defaults to None.
        model_distribute (distribute_util.Distribute, optional): Define the way to ditribute the model. Defaults to distribute_util.broadcast().

    Returns:
        remote_blob_util.BlobDef:  A N-D `Blob` with the shape of (batch_size, units).

    Raises:
        ValueError: The dimension of input `Blob` must be less than 2.
        VauleError: Model distribute must be in auto, broadcast, split.
        ValueError: The input must be a 2D `Blob` when the model distribute is split.
    """
    in_shape = inputs.shape
    in_num_axes = len(in_shape)
    assert in_num_axes >= 2

    assert (model_distribute is distribute_util.auto()
            or model_distribute is distribute_util.broadcast()
            or model_distribute is distribute_util.split(0))

    if model_distribute is distribute_util.split(0):
        assert in_num_axes == 2  # model distribute is hard for reshape split dim 1

    if in_num_axes > 2:
        inputs = flow.reshape(inputs, (-1, in_shape[-1]))

    with flow.scope.namespace(name):
        if kernel_initializer is None:
            kernel_initializer = flow.constant_initializer(0)

        weight = flow.get_variable(
            name="weight",
            shape=(units, inputs.shape[1]),
            dtype=inputs.dtype,
            initializer=kernel_initializer,
            regularizer=kernel_regularizer,
            trainable=trainable,
            model_name="weight",
            distribute=model_distribute,
            reuse=False,
        )
        weight = weight.with_distribute(model_distribute)

        out = flow.matmul(a=inputs, b=weight, transpose_b=True, name="matmul")

        if use_bias:
            if bias_initializer is None:
                bias_initializer = flow.constant_initializer(0)

            bias = flow.get_variable(
                name="bias",
                shape=(units, ),
                dtype=inputs.dtype,
                initializer=bias_initializer,
                regularizer=bias_regularizer,
                trainable=trainable,
                model_name="bias",
                distribute=model_distribute,
                reuse=False,
            )
            bias = bias.with_distribute(model_distribute)
            out = flow.nn.bias_add(out, bias, name="bias_add")

        if callable(activation):
            out = activation(out, name="activation")

    if in_num_axes > 2:
        out = flow.reshape(out, in_shape[:-1] + (units, ))

    return out
コード例 #12
0
def layer_norm(
    inputs: remote_blob_util.BlobDef,
    center: bool = True,
    scale: bool = True,
    trainable: bool = True,
    begin_norm_axis: int = 1,
    begin_params_axis: int = -1,
    epsilon: float = 1e-5,
    name: str = "LayerNorm",
) -> remote_blob_util.BlobDef:
    r"""Analogous to `tf.keras.layers.LayerNormalization <https://www.tensorflow.org/api_docs/python/tf/keras/layers/LayerNormalization>`_

    Args:
        inputs (remote_blob_util.BlobDef): Input `Blob`.
        center (bool, optional): A boolean specifies whether to shift input `Blob`. Defaults to True.
        scale (bool, optional): A boolean specifies whether to scaleinput `Blob`. Defaults to True.
        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
        begin_norm_axis (int, optional): An integer specifies which axis to normalize at first. Defaults to 1.
        begin_params_axis (int, optional):  An integer specifies which axis params at . Defaults to -1.
        epsilon (float, optional): A small float is added to avoid division by zero. Defaults to 1e-5.
        name (Optional[str], optional):  This layer's name. Defaults to None.

    Returns:
        remote_blob_util.BlobDef: A normalized `Blob` with same shape of input. 
    """
    op_builder = (
        flow.user_op_builder(name)
        .Op("layer_norm")
        .Input("x", [inputs])
        .Output("y")
        .Output("mean")
        .Output("inv_variance")
    )

    if center is False and scale is False:
        trainable = False

    param_shape = inputs.shape[begin_params_axis:]
    if center:
        with flow.scope.namespace(name):
            beta = flow.get_variable(
                name="beta",
                shape=param_shape,
                dtype=inputs.dtype,
                initializer=flow.constant_initializer(0.0),
                trainable=trainable,
                model_name="beta",
                distribute=distribute_util.broadcast(),
                reuse=False,
            )

        op_builder.Input("beta", [beta])

    if scale:
        with flow.scope.namespace(name):
            gamma = flow.get_variable(
                name="gamma",
                shape=param_shape,
                dtype=inputs.dtype,
                initializer=flow.constant_initializer(1.0),
                trainable=trainable,
                model_name="gamma",
                distribute=distribute_util.broadcast(),
                reuse=False,
            )

        op_builder.Input("gamma", [gamma])
        op_builder.Output("normalized")

    op_builder.Attr("center", center)
    op_builder.Attr("scale", scale)
    op_builder.Attr("begin_norm_axis", begin_norm_axis)
    op_builder.Attr("begin_params_axis", begin_params_axis)
    op_builder.Attr("epsilon", epsilon)

    return op_builder.Build().InferAndTryRun().RemoteBlobList()[0]
コード例 #13
0
ファイル: prelu.py プロジェクト: zhouyuegit/oneflow
def prelu(
    inputs: remote_blob_util.BlobDef,
    alpha_initializer: Optional[op_conf_util.InitializerConf] = None,
    alpha_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    shared_axes: Optional[Sequence[int]] = None,
    trainable: bool = True,
    name: str = "PRelu",
    model_distribute: distribute_util.Distribute = distribute_util.broadcast(),
) -> remote_blob_util.BlobDef:
    r"""The Prelu(Parametric Rectified Linear Unit) activation. 
    
    The :math:`\alpha` is a parameter that can be trained in network

    The equation is

    .. math:: 

        out = max(0, x) + \alpha*min(0, x)

    Args:
        inputs (remote_blob_util.BlobDef): The input Blob. 
        alpha_initializer (Optional[op_conf_util.InitializerConf], optional): The initializer of alpha. Defaults to None.
        alpha_regularizer (Optional[op_conf_util.RegularizerConf], optional): The regularizer of alpha. Defaults to None.
        shared_axes (Optional[Sequence[int]], optional): The axis along which to share learnable parameters for the prelu activation function. Defaults to None.
        trainable (bool, optional): Whether to train the parameter :math:`\alpha`. Defaults to True.
        name (str, optional): The name for the operation. Defaults to "PRelu".
        model_distribute (distribute_util.Distribute, optional): Define the way to ditribute the model. Defaults to distribute_util.broadcast().

    Returns:
        remote_blob_util.BlobDef: The activated Blob

    For example: 

    .. code-block:: python 

        import oneflow as flow
        import oneflow.typing as tp

        BATCH_SIZE = 100


        def lenet(data, train=False):
            initializer = flow.truncated_normal(0.1)
            conv1 = flow.layers.conv2d(
                data,
                32,
                5,
                padding="SAME",
                name="conv1",
                kernel_initializer=initializer,
            )
            prelu1 = flow.layers.prelu(conv1,
                                    alpha_initializer=initializer,
                                    shared_axes=[2, 3],
                                    name="Prelu1")
            pool1 = flow.nn.max_pool2d(
                prelu1, ksize=2, strides=2, padding="SAME", name="pool1", data_format="NCHW"
            )
            conv2 = flow.layers.conv2d(
                pool1,
                64,
                5,
                padding="SAME",
                name="conv2",
                kernel_initializer=initializer,
            )
            prelu2 = flow.layers.prelu(conv2,
                                    alpha_initializer=initializer,
                                    shared_axes=[2, 3],
                                    name="Prelu2")
            pool2 = flow.nn.max_pool2d(
                prelu2, ksize=2, strides=2, padding="SAME", name="pool2", data_format="NCHW"
            )
            reshape = flow.reshape(pool2, [pool2.shape[0], -1])
            hidden = flow.layers.dense(
                reshape,
                512,
                activation=flow.nn.relu,
                kernel_initializer=initializer,
                name="dense1",
            )
            if train:
                hidden = flow.nn.dropout(hidden, rate=0.5, name="dropout")
            return flow.layers.dense(hidden, 10, kernel_initializer=initializer, name="dense2")


        @flow.global_function(type="train")
        def train_job(
                images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
                labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
        ) -> tp.Numpy:
            with flow.scope.placement("gpu", "0:0"):
                logits = lenet(images, train=True)
                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
                    labels, logits, name="softmax_loss"
                )

            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
            flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(loss)
            return loss

    """
    alpha_shape = list(inputs.shape[1:])
    if shared_axes is not None:
        for i in shared_axes:
            assert i >= 1 and i < len(inputs.shape)
            alpha_shape[i - 1] = 1

    if alpha_initializer is None:
        alpha_initializer = flow.constant_initializer(0)

    with flow.scope.namespace(name):
        alpha = flow.get_variable(
            name="alpha",
            shape=alpha_shape,
            dtype=inputs.dtype,
            initializer=alpha_initializer,
            regularizer=alpha_regularizer,
            trainable=trainable,
            distribute=model_distribute,
            reuse=False,
        )

    op = (flow.user_op_builder(name).Op("prelu").Input("x", [inputs]).Input(
        "alpha", [alpha]).Output("y").Build())
    return op.InferAndTryRun().SoleOutputBlob()